1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*******************************************************************************
7*
8* File CCONVTST.C
9*
10* Modification History:
11*        Name                     Description
12*    Steven R. Loomis     7/8/1999      Adding input buffer test
13********************************************************************************
14*/
15#include <stdio.h>
16#include "cstring.h"
17#include "unicode/uloc.h"
18#include "unicode/ucnv.h"
19#include "unicode/ucnv_err.h"
20#include "unicode/ucnv_cb.h"
21#include "cintltst.h"
22#include "unicode/utypes.h"
23#include "unicode/ustring.h"
24#include "unicode/ucol.h"
25#include "unicode/utf16.h"
26#include "cmemory.h"
27#include "nucnvtst.h"
28
29#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
30
31static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
32static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
33#if !UCONFIG_NO_COLLATION
34static void TestJitterbug981(void);
35#endif
36#if !UCONFIG_NO_LEGACY_CONVERSION
37static void TestJitterbug1293(void);
38#endif
39static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
40static void TestConverterTypesAndStarters(void);
41static void TestAmbiguous(void);
42static void TestSignatureDetection(void);
43static void TestUTF7(void);
44static void TestIMAP(void);
45static void TestUTF8(void);
46static void TestCESU8(void);
47static void TestUTF16(void);
48static void TestUTF16BE(void);
49static void TestUTF16LE(void);
50static void TestUTF32(void);
51static void TestUTF32BE(void);
52static void TestUTF32LE(void);
53static void TestLATIN1(void);
54
55#if !UCONFIG_NO_LEGACY_CONVERSION
56static void TestSBCS(void);
57static void TestDBCS(void);
58static void TestMBCS(void);
59#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
60static void TestICCRunout(void);
61#endif
62
63#ifdef U_ENABLE_GENERIC_ISO_2022
64static void TestISO_2022(void);
65#endif
66
67static void TestISO_2022_JP(void);
68static void TestISO_2022_JP_1(void);
69static void TestISO_2022_JP_2(void);
70static void TestISO_2022_KR(void);
71static void TestISO_2022_KR_1(void);
72static void TestISO_2022_CN(void);
73#if 0
74   /*
75    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
76    */
77static void TestISO_2022_CN_EXT(void);
78#endif
79static void TestJIS(void);
80static void TestHZ(void);
81#endif
82
83static void TestSCSU(void);
84
85#if !UCONFIG_NO_LEGACY_CONVERSION
86static void TestEBCDIC_STATEFUL(void);
87static void TestGB18030(void);
88static void TestLMBCS(void);
89static void TestJitterbug255(void);
90static void TestEBCDICUS4XML(void);
91#if 0
92   /*
93    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
94    */
95static void TestJitterbug915(void);
96#endif
97static void TestISCII(void);
98
99static void TestCoverageMBCS(void);
100static void TestJitterbug2346(void);
101static void TestJitterbug2411(void);
102static void TestJB5275(void);
103static void TestJB5275_1(void);
104static void TestJitterbug6175(void);
105
106static void TestIsFixedWidth(void);
107#endif
108
109static void TestInBufSizes(void);
110
111static void TestRoundTrippingAllUTF(void);
112static void TestConv(const uint16_t in[],
113                     int len,
114                     const char* conv,
115                     const char* lang,
116                     char byteArr[],
117                     int byteArrLen);
118
119/* open a converter, using test data if it begins with '@' */
120static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
121
122
123#define NEW_MAX_BUFFER 999
124
125static int32_t  gInBufferSize = NEW_MAX_BUFFER;
126static int32_t  gOutBufferSize = NEW_MAX_BUFFER;
127static char     gNuConvTestName[1024];
128
129#define nct_min(x,y)  ((x<y) ? x : y)
130
131static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
132{
133  if(cnv && cnv[0] == '@') {
134    return ucnv_openPackage(loadTestData(err), cnv+1, err);
135  } else {
136    return ucnv_open(cnv, err);
137  }
138}
139
140static void printSeq(const unsigned char* a, int len)
141{
142    int i=0;
143    log_verbose("{");
144    while (i<len)
145        log_verbose("0x%02x ", a[i++]);
146    log_verbose("}\n");
147}
148
149static void printUSeq(const UChar* a, int len)
150{
151    int i=0;
152    log_verbose("{U+");
153    while (i<len) log_verbose("0x%04x ", a[i++]);
154    log_verbose("}\n");
155}
156
157static void printSeqErr(const unsigned char* a, int len)
158{
159    int i=0;
160    fprintf(stderr, "{");
161    while (i<len)
162        fprintf(stderr, "0x%02x ", a[i++]);
163    fprintf(stderr, "}\n");
164}
165
166static void printUSeqErr(const UChar* a, int len)
167{
168    int i=0;
169    fprintf(stderr, "{U+");
170    while (i<len)
171        fprintf(stderr, "0x%04x ", a[i++]);
172    fprintf(stderr,"}\n");
173}
174
175static void
176TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
177{
178     const char* s0;
179     const char* s=(char*)source;
180     const int32_t *r=results;
181     UErrorCode errorCode=U_ZERO_ERROR;
182     UChar32 c;
183
184     while(s<limit) {
185        s0=s;
186        c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
187        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
188            break; /* no more significant input */
189        } else if(U_FAILURE(errorCode)) {
190            log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
191            break;
192        } else if(
193            /* test the expected number of input bytes only if >=0 */
194            (*r>=0 && (int32_t)(s-s0)!=*r) ||
195            c!=*(r+1)
196        ) {
197            log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
198                message, c, (s-s0), *(r+1), *r);
199            break;
200        }
201        r+=2;
202    }
203}
204
205static void
206TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
207{
208     const char* s=(char*)source;
209     UErrorCode errorCode=U_ZERO_ERROR;
210     uint32_t c;
211     c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
212     if(errorCode != expected){
213        log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
214     }
215     if(c != 0xFFFD && c != 0xffff){
216        log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
217     }
218
219}
220
221static void TestInBufSizes(void)
222{
223  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
224#if 1
225  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
226  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
227  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
228  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
229  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
230  TestNewConvertWithBufferSizes(1,1);
231  TestNewConvertWithBufferSizes(2,3);
232  TestNewConvertWithBufferSizes(3,2);
233#endif
234}
235
236static void TestOutBufSizes(void)
237{
238#if 1
239  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
240  TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
241  TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
242  TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
243  TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
244  TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
245
246#endif
247}
248
249
250void addTestNewConvert(TestNode** root)
251{
252#if !UCONFIG_NO_FILE_IO
253   addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
254   addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
255#endif
256   addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
257   addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
258   addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
259   addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
260   addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
261   addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
262
263   /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
264   addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
265   addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
266   addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
267   addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
268   addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
269   addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
270   addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
271
272#if !UCONFIG_NO_LEGACY_CONVERSION
273   addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
274#endif
275
276   addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
277
278#if !UCONFIG_NO_LEGACY_CONVERSION
279   addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
280#if !UCONFIG_NO_FILE_IO
281   addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
282   addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
283#endif
284   addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
285
286#ifdef U_ENABLE_GENERIC_ISO_2022
287   addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
288#endif
289
290   addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
291   addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
292   addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
293   addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
294   addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
295   addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
296   addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
297   /*
298    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
299   addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
300   addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
301    */
302   addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
303#endif
304
305   addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
306
307#if !UCONFIG_NO_LEGACY_CONVERSION
308   addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
309   addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
310   addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
311   addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
312   addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
313   addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
314   addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
315#if !UCONFIG_NO_COLLATION
316   addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
317#endif
318
319   addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
320#endif
321
322
323#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
324   addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
325#endif
326
327   addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
328
329#if !UCONFIG_NO_LEGACY_CONVERSION
330   addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
331   addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
332   addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
333
334   addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
335#endif
336}
337
338
339/* Note that this test already makes use of statics, so it's not really
340   multithread safe.
341   This convenience function lets us make the error messages actually useful.
342*/
343
344static void setNuConvTestName(const char *codepage, const char *direction)
345{
346    sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
347        codepage,
348        direction,
349        (int)gInBufferSize,
350        (int)gOutBufferSize);
351}
352
353typedef enum
354{
355  TC_OK       = 0,  /* test was OK */
356  TC_MISMATCH = 1,  /* Match failed - err was printed */
357  TC_FAIL     = 2   /* Test failed, don't print an err because it was already printed. */
358} ETestConvertResult;
359
360/* Note: This function uses global variables and it will not do offset
361checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
362static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
363                const char *codepage, const int32_t *expectOffsets , UBool useFallback)
364{
365    UErrorCode status = U_ZERO_ERROR;
366    UConverter *conv = 0;
367    char    junkout[NEW_MAX_BUFFER]; /* FIX */
368    int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
369    char *p;
370    const UChar *src;
371    char *end;
372    char *targ;
373    int32_t *offs;
374    int i;
375    int32_t   realBufferSize;
376    char *realBufferEnd;
377    const UChar *realSourceEnd;
378    const UChar *sourceLimit;
379    UBool checkOffsets = TRUE;
380    UBool doFlush;
381
382    for(i=0;i<NEW_MAX_BUFFER;i++)
383        junkout[i] = (char)0xF0;
384    for(i=0;i<NEW_MAX_BUFFER;i++)
385        junokout[i] = 0xFF;
386
387    setNuConvTestName(codepage, "FROM");
388
389    log_verbose("\n=========  %s\n", gNuConvTestName);
390
391    conv = my_ucnv_open(codepage, &status);
392
393    if(U_FAILURE(status))
394    {
395        log_data_err("Couldn't open converter %s\n",codepage);
396        return TC_FAIL;
397    }
398    if(useFallback){
399        ucnv_setFallback(conv,useFallback);
400    }
401
402    log_verbose("Converter opened..\n");
403
404    src = source;
405    targ = junkout;
406    offs = junokout;
407
408    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
409    realBufferEnd = junkout + realBufferSize;
410    realSourceEnd = source + sourceLen;
411
412    if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
413        checkOffsets = FALSE;
414
415    do
416    {
417      end = nct_min(targ + gOutBufferSize, realBufferEnd);
418      sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
419
420      doFlush = (UBool)(sourceLimit == realSourceEnd);
421
422      if(targ == realBufferEnd) {
423        log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
424        return TC_FAIL;
425      }
426      log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
427
428
429      status = U_ZERO_ERROR;
430
431      ucnv_fromUnicode (conv,
432                        &targ,
433                        end,
434                        &src,
435                        sourceLimit,
436                        checkOffsets ? offs : NULL,
437                        doFlush, /* flush if we're at the end of the input data */
438                        &status);
439    } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
440
441    if(U_FAILURE(status)) {
442      log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
443      return TC_FAIL;
444    }
445
446    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
447                sourceLen, targ-junkout);
448
449    if(getTestOption(VERBOSITY_OPTION))
450    {
451      char junk[9999];
452      char offset_str[9999];
453      char *ptr;
454
455      junk[0] = 0;
456      offset_str[0] = 0;
457      for(ptr = junkout;ptr<targ;ptr++) {
458        sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
459        sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
460      }
461
462      log_verbose(junk);
463      printSeq((const uint8_t *)expect, expectLen);
464      if ( checkOffsets ) {
465        log_verbose("\nOffsets:");
466        log_verbose(offset_str);
467      }
468      log_verbose("\n");
469    }
470    ucnv_close(conv);
471
472    if(expectLen != targ-junkout) {
473      log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
474      log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
475      fprintf(stderr, "Got:\n");
476      printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
477      fprintf(stderr, "Expected:\n");
478      printSeqErr((const unsigned char*)expect, expectLen);
479      return TC_MISMATCH;
480    }
481
482    if (checkOffsets && (expectOffsets != 0) ) {
483      log_verbose("comparing %d offsets..\n", targ-junkout);
484      if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
485        log_err("did not get the expected offsets. %s\n", gNuConvTestName);
486        printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
487        log_err("\n");
488        log_err("Got  :     ");
489        for(p=junkout;p<targ;p++) {
490          log_err("%d,", junokout[p-junkout]);
491        }
492        log_err("\n");
493        log_err("Expected:  ");
494        for(i=0; i<(targ-junkout); i++) {
495          log_err("%d,", expectOffsets[i]);
496        }
497        log_err("\n");
498      }
499    }
500
501    log_verbose("comparing..\n");
502    if(!memcmp(junkout, expect, expectLen)) {
503      log_verbose("Matches!\n");
504      return TC_OK;
505    } else {
506      log_err("String does not match u->%s\n", gNuConvTestName);
507      printUSeqErr(source, sourceLen);
508      fprintf(stderr, "Got:\n");
509      printSeqErr((const unsigned char *)junkout, expectLen);
510      fprintf(stderr, "Expected:\n");
511      printSeqErr((const unsigned char *)expect, expectLen);
512
513      return TC_MISMATCH;
514    }
515}
516
517/* Note: This function uses global variables and it will not do offset
518checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
519static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
520                                          const char *codepage, const int32_t *expectOffsets, UBool useFallback)
521{
522    UErrorCode status = U_ZERO_ERROR;
523    UConverter *conv = 0;
524    UChar    junkout[NEW_MAX_BUFFER]; /* FIX */
525    int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
526    const char *src;
527    const char *realSourceEnd;
528    const char *srcLimit;
529    UChar *p;
530    UChar *targ;
531    UChar *end;
532    int32_t *offs;
533    int i;
534    UBool   checkOffsets = TRUE;
535
536    int32_t   realBufferSize;
537    UChar *realBufferEnd;
538
539
540    for(i=0;i<NEW_MAX_BUFFER;i++)
541        junkout[i] = 0xFFFE;
542
543    for(i=0;i<NEW_MAX_BUFFER;i++)
544        junokout[i] = -1;
545
546    setNuConvTestName(codepage, "TO");
547
548    log_verbose("\n=========  %s\n", gNuConvTestName);
549
550    conv = my_ucnv_open(codepage, &status);
551
552    if(U_FAILURE(status))
553    {
554        log_data_err("Couldn't open converter %s\n",gNuConvTestName);
555        return TC_FAIL;
556    }
557    if(useFallback){
558        ucnv_setFallback(conv,useFallback);
559    }
560    log_verbose("Converter opened..\n");
561
562    src = (const char *)source;
563    targ = junkout;
564    offs = junokout;
565
566    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
567    realBufferEnd = junkout + realBufferSize;
568    realSourceEnd = src + sourcelen;
569
570    if ( gOutBufferSize != realBufferSize ||  gInBufferSize != NEW_MAX_BUFFER )
571        checkOffsets = FALSE;
572
573    do
574    {
575        end = nct_min( targ + gOutBufferSize, realBufferEnd);
576        srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
577
578        if(targ == realBufferEnd)
579        {
580            log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
581            return TC_FAIL;
582        }
583        log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
584
585        /* oldTarg = targ; */
586
587        status = U_ZERO_ERROR;
588
589        ucnv_toUnicode (conv,
590                &targ,
591                end,
592                &src,
593                srcLimit,
594                checkOffsets ? offs : NULL,
595                (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
596                &status);
597
598        /*        offs += (targ-oldTarg); */
599
600      } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
601
602    if(U_FAILURE(status))
603    {
604        log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
605        return TC_FAIL;
606    }
607
608    log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
609        sourcelen, targ-junkout);
610    if(getTestOption(VERBOSITY_OPTION))
611    {
612        char junk[9999];
613        char offset_str[9999];
614        UChar *ptr;
615
616        junk[0] = 0;
617        offset_str[0] = 0;
618
619        for(ptr = junkout;ptr<targ;ptr++)
620        {
621            sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
622            sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
623        }
624
625        log_verbose(junk);
626        printUSeq(expect, expectlen);
627        if ( checkOffsets )
628          {
629            log_verbose("\nOffsets:");
630            log_verbose(offset_str);
631          }
632        log_verbose("\n");
633    }
634    ucnv_close(conv);
635
636    log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
637
638    if (checkOffsets && (expectOffsets != 0))
639    {
640        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
641            log_err("did not get the expected offsets. %s\n",gNuConvTestName);
642            log_err("Got:      ");
643            for(p=junkout;p<targ;p++) {
644                log_err("%d,", junokout[p-junkout]);
645            }
646            log_err("\n");
647            log_err("Expected: ");
648            for(i=0; i<(targ-junkout); i++) {
649                log_err("%d,", expectOffsets[i]);
650            }
651            log_err("\n");
652            log_err("output:   ");
653            for(i=0; i<(targ-junkout); i++) {
654                log_err("%X,", junkout[i]);
655            }
656            log_err("\n");
657            log_err("input:    ");
658            for(i=0; i<(src-(const char *)source); i++) {
659                log_err("%X,", (unsigned char)source[i]);
660            }
661            log_err("\n");
662        }
663    }
664
665    if(!memcmp(junkout, expect, expectlen*2))
666    {
667        log_verbose("Matches!\n");
668        return TC_OK;
669    }
670    else
671    {
672        log_err("String does not match. %s\n", gNuConvTestName);
673        log_verbose("String does not match. %s\n", gNuConvTestName);
674        printf("\nGot:");
675        printUSeqErr(junkout, expectlen);
676        printf("\nExpected:");
677        printUSeqErr(expect, expectlen);
678        return TC_MISMATCH;
679    }
680}
681
682
683static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
684{
685/** test chars #1 */
686    /*  1 2 3  1Han 2Han 3Han .  */
687    static const UChar   sampleText[] =
688     { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
689    static const UChar sampleTextRoundTripUnmappable[] =
690    { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
691
692
693    static const uint8_t expectedUTF8[] =
694     { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
695    static const int32_t toUTF8Offs[] =
696     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
697    static const int32_t fmUTF8Offs[] =
698     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
699
700#ifdef U_ENABLE_GENERIC_ISO_2022
701    /* Same as UTF8, but with ^[%B preceeding */
702    static const const uint8_t expectedISO2022[] =
703     { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
704    static const int32_t toISO2022Offs[]     =
705     { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
706       0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
707    static const int32_t fmISO2022Offs[] =
708     { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
709#endif
710
711    /*  1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
712    static const uint8_t expectedIBM930[] =
713     { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
714    static const int32_t toIBM930Offs[] =
715     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
716    static const int32_t fmIBM930Offs[] =
717     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
718
719    /* 1 2 3 0 h1 h2 h3 . MBCS*/
720    static const uint8_t expectedIBM943[] =
721     {  0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
722    static const int32_t toIBM943Offs    [] =
723     {  0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
724    static const int32_t fmIBM943Offs[] =
725     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
726
727    /* 1 2 3 0 h1 h2 h3 . DBCS*/
728    static const uint8_t expectedIBM9027[] =
729     {  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
730    static const int32_t toIBM9027Offs    [] =
731     {  0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
732
733     /* 1 2 3 0 <?> <?> <?> . SBCS*/
734    static const uint8_t expectedIBM920[] =
735     {  0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
736    static const int32_t toIBM920Offs    [] =
737     {  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
738
739    /* 1 2 3 0 <?> <?> <?> . SBCS*/
740    static const uint8_t expectedISO88593[] =
741     { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
742    static const int32_t toISO88593Offs[]     =
743     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
744
745    /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
746    static const uint8_t expectedLATIN1[] =
747     { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
748    static const int32_t toLATIN1Offs[]     =
749     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
750
751
752    /*  etc */
753    static const uint8_t expectedUTF16BE[] =
754     { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
755    static const int32_t toUTF16BEOffs[]=
756     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
757    static const int32_t fmUTF16BEOffs[] =
758     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e, 0x0010, 0x0010 };
759
760    static const uint8_t expectedUTF16LE[] =
761     { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
762    static const int32_t toUTF16LEOffs[]=
763     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
764    static const int32_t fmUTF16LEOffs[] =
765     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
766
767    static const uint8_t expectedUTF32BE[] =
768     { 0x00, 0x00, 0x00, 0x31,
769       0x00, 0x00, 0x00, 0x32,
770       0x00, 0x00, 0x00, 0x33,
771       0x00, 0x00, 0x00, 0x00,
772       0x00, 0x00, 0x4e, 0x00,
773       0x00, 0x00, 0x4e, 0x8c,
774       0x00, 0x00, 0x4e, 0x09,
775       0x00, 0x00, 0x00, 0x2e,
776       0x00, 0x02, 0x00, 0x21 };
777    static const int32_t toUTF32BEOffs[]=
778     { 0x00, 0x00, 0x00, 0x00,
779       0x01, 0x01, 0x01, 0x01,
780       0x02, 0x02, 0x02, 0x02,
781       0x03, 0x03, 0x03, 0x03,
782       0x04, 0x04, 0x04, 0x04,
783       0x05, 0x05, 0x05, 0x05,
784       0x06, 0x06, 0x06, 0x06,
785       0x07, 0x07, 0x07, 0x07,
786       0x08, 0x08, 0x08, 0x08,
787       0x08, 0x08, 0x08, 0x08 };
788    static const int32_t fmUTF32BEOffs[] =
789     { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c, 0x0020, 0x0020 };
790
791    static const uint8_t expectedUTF32LE[] =
792     { 0x31, 0x00, 0x00, 0x00,
793       0x32, 0x00, 0x00, 0x00,
794       0x33, 0x00, 0x00, 0x00,
795       0x00, 0x00, 0x00, 0x00,
796       0x00, 0x4e, 0x00, 0x00,
797       0x8c, 0x4e, 0x00, 0x00,
798       0x09, 0x4e, 0x00, 0x00,
799       0x2e, 0x00, 0x00, 0x00,
800       0x21, 0x00, 0x02, 0x00 };
801    static const int32_t toUTF32LEOffs[]=
802     { 0x00, 0x00, 0x00, 0x00,
803       0x01, 0x01, 0x01, 0x01,
804       0x02, 0x02, 0x02, 0x02,
805       0x03, 0x03, 0x03, 0x03,
806       0x04, 0x04, 0x04, 0x04,
807       0x05, 0x05, 0x05, 0x05,
808       0x06, 0x06, 0x06, 0x06,
809       0x07, 0x07, 0x07, 0x07,
810       0x08, 0x08, 0x08, 0x08,
811       0x08, 0x08, 0x08, 0x08 };
812    static const int32_t fmUTF32LEOffs[] =
813     { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
814
815
816
817
818/** Test chars #2 **/
819
820    /* Sahha [health],  slashed h's */
821    static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
822    static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
823
824    /* LMBCS */
825    static const UChar LMBCSUChars[]     = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
826    static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
827    static const int32_t toLMBCSOffs[]   = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
828    static const int32_t fmLMBCSOffs[]   = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
829    /*********************************** START OF CODE finally *************/
830
831    gInBufferSize = insize;
832    gOutBufferSize = outsize;
833
834    log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
835
836
837    /*UTF-8*/
838    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
839        expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
840
841    log_verbose("Test surrogate behaviour for UTF8\n");
842    {
843        static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
844        static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
845                           0xf0, 0x90, 0x90, 0x81,
846                           0xef, 0xbf, 0xbd
847        };
848        static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
849        testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
850                         expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
851
852
853    }
854
855#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
856    /*ISO-2022*/
857    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
858        expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
859#endif
860
861    /*UTF16 LE*/
862    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
863        expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
864    /*UTF16 BE*/
865    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
866        expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
867    /*UTF32 LE*/
868    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
869        expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
870    /*UTF32 BE*/
871    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
872        expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
873
874    /*LATIN_1*/
875    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
876        expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
877
878#if !UCONFIG_NO_LEGACY_CONVERSION
879    /*EBCDIC_STATEFUL*/
880    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
881        expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
882
883    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
884        expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
885
886    /*MBCS*/
887
888    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
889        expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
890    /*DBCS*/
891    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
892        expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
893    /*SBCS*/
894    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
895        expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
896    /*SBCS*/
897    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
898        expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
899#endif
900
901
902/****/
903
904    /*UTF-8*/
905    testConvertToU(expectedUTF8, sizeof(expectedUTF8),
906        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
907#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
908    /*ISO-2022*/
909    testConvertToU(expectedISO2022, sizeof(expectedISO2022),
910        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
911#endif
912
913    /*UTF16 LE*/
914    testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
915        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
916    /*UTF16 BE*/
917    testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
918        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
919    /*UTF32 LE*/
920    testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
921        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
922    /*UTF32 BE*/
923    testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
924        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
925
926#if !UCONFIG_NO_LEGACY_CONVERSION
927    /*EBCDIC_STATEFUL*/
928    testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
929            sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE);
930    /*MBCS*/
931    testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
932            sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE);
933#endif
934
935    /* Try it again to make sure it still works */
936    testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
937        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
938
939#if !UCONFIG_NO_LEGACY_CONVERSION
940    testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
941        malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
942
943    testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
944        expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
945
946    /*LMBCS*/
947    testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
948        expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
949    testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
950        LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
951#endif
952
953    /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
954    {
955        /* encode directly set D and set O */
956        static const uint8_t utf7[] = {
957            /*
958                Hi Mom -+Jjo--!
959                A+ImIDkQ.
960                +-
961                +ZeVnLIqe-
962            */
963            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
964            0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
965            0x2b, 0x2d,
966            0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
967        };
968        static const UChar unicode[] = {
969            /*
970                Hi Mom -<WHITE SMILING FACE>-!
971                A<NOT IDENTICAL TO><ALPHA>.
972                +
973                [Japanese word "nihongo"]
974            */
975            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
976            0x41, 0x2262, 0x0391, 0x2e,
977            0x2b,
978            0x65e5, 0x672c, 0x8a9e
979        };
980        static const int32_t toUnicodeOffsets[] = {
981            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
982            15, 17, 19, 23,
983            24,
984            27, 29, 32
985        };
986        static const int32_t fromUnicodeOffsets[] = {
987            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
988            11, 12, 12, 12, 13, 13, 13, 13, 14,
989            15, 15,
990            16, 16, 16, 17, 17, 17, 18, 18, 18, 18
991        };
992
993        /* same but escaping set O (the exclamation mark) */
994        static const uint8_t utf7Restricted[] = {
995            /*
996                Hi Mom -+Jjo--+ACE-
997                A+ImIDkQ.
998                +-
999                +ZeVnLIqe-
1000            */
1001            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1002            0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1003            0x2b, 0x2d,
1004            0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1005        };
1006        static const int32_t toUnicodeOffsetsR[] = {
1007            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1008            19, 21, 23, 27,
1009            28,
1010            31, 33, 36
1011        };
1012        static const int32_t fromUnicodeOffsetsR[] = {
1013            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1014            11, 12, 12, 12, 13, 13, 13, 13, 14,
1015            15, 15,
1016            16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1017        };
1018
1019        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
1020
1021        testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
1022
1023        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
1024
1025        testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
1026    }
1027
1028    /*
1029     * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1030     * modified according to RFC 2060,
1031     * and supplemented with the one example in RFC 2060 itself.
1032     */
1033    {
1034        static const uint8_t imap[] = {
1035            /*  Hi Mom -&Jjo--!
1036                A&ImIDkQ-.
1037                &-
1038                &ZeVnLIqe-
1039                \
1040                ~peter
1041                /mail
1042                /&ZeVnLIqe-
1043                /&U,BTFw-
1044            */
1045            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1046            0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1047            0x26, 0x2d,
1048            0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1049            0x5c,
1050            0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1051            0x2f, 0x6d, 0x61, 0x69, 0x6c,
1052            0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1053            0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1054        };
1055        static const UChar unicode[] = {
1056            /*  Hi Mom -<WHITE SMILING FACE>-!
1057                A<NOT IDENTICAL TO><ALPHA>.
1058                &
1059                [Japanese word "nihongo"]
1060                \
1061                ~peter
1062                /mail
1063                /<65e5, 672c, 8a9e>
1064                /<53f0, 5317>
1065            */
1066            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1067            0x41, 0x2262, 0x0391, 0x2e,
1068            0x26,
1069            0x65e5, 0x672c, 0x8a9e,
1070            0x5c,
1071            0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1072            0x2f, 0x6d, 0x61, 0x69, 0x6c,
1073            0x2f, 0x65e5, 0x672c, 0x8a9e,
1074            0x2f, 0x53f0, 0x5317
1075        };
1076        static const int32_t toUnicodeOffsets[] = {
1077            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1078            15, 17, 19, 24,
1079            25,
1080            28, 30, 33,
1081            37,
1082            38, 39, 40, 41, 42, 43,
1083            44, 45, 46, 47, 48,
1084            49, 51, 53, 56,
1085            60, 62, 64
1086        };
1087        static const int32_t fromUnicodeOffsets[] = {
1088            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1089            11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1090            15, 15,
1091            16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1092            19,
1093            20, 21, 22, 23, 24, 25,
1094            26, 27, 28, 29, 30,
1095            31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1096            35, 36, 36, 36, 37, 37, 37, 37, 37
1097        };
1098
1099        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1100
1101        testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1102    }
1103
1104    /* Test UTF-8 bad data handling*/
1105    {
1106        static const uint8_t utf8[]={
1107            0x61,
1108            0xf7, 0xbf, 0xbf, 0xbf,         /* > 10FFFF */
1109            0x00,
1110            0x62,
1111            0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1112            0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1113            0xf4, 0x8f, 0xbf, 0xbf,         /* 10FFFF */
1114            0xdf, 0xbf,                     /* 7ff */
1115            0xbf,                           /* truncated tail */
1116            0xf4, 0x90, 0x80, 0x80,         /* 11FFFF */
1117            0x02
1118        };
1119
1120        static const uint16_t utf8Expected[]={
1121            0x0061,
1122            0xfffd,
1123            0x0000,
1124            0x0062,
1125            0xfffd,
1126            0xfffd,
1127            0xdbff, 0xdfff,
1128            0x07ff,
1129            0xfffd,
1130            0xfffd,
1131            0x0002
1132        };
1133
1134        static const int32_t utf8Offsets[]={
1135            0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1136        };
1137        testConvertToU(utf8, sizeof(utf8),
1138                       utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
1139
1140    }
1141
1142    /* Test UTF-32BE bad data handling*/
1143    {
1144        static const uint8_t utf32[]={
1145            0x00, 0x00, 0x00, 0x61,
1146            0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
1147            0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1148            0x00, 0x00, 0x00, 0x62,
1149            0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1150            0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
1151            0x00, 0x00, 0x01, 0x62,
1152            0x00, 0x00, 0x02, 0x62
1153        };
1154        static const uint16_t utf32Expected[]={
1155            0x0061,
1156            0xfffd,         /* 0x110000 out of range */
1157            0xDBFF,         /* 0x10FFFF in range */
1158            0xDFFF,
1159            0x0062,
1160            0xfffd,         /* 0xffffffff out of range */
1161            0xfffd,         /* 0x7fffffff out of range */
1162            0x0162,
1163            0x0262
1164        };
1165        static const int32_t utf32Offsets[]={
1166            0, 4, 8, 8, 12, 16, 20, 24, 28
1167        };
1168        static const uint8_t utf32ExpectedBack[]={
1169            0x00, 0x00, 0x00, 0x61,
1170            0x00, 0x00, 0xff, 0xfd,         /* 0x110000 out of range */
1171            0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1172            0x00, 0x00, 0x00, 0x62,
1173            0x00, 0x00, 0xff, 0xfd,         /* 0xffffffff out of range */
1174            0x00, 0x00, 0xff, 0xfd,         /* 0x7fffffff out of range */
1175            0x00, 0x00, 0x01, 0x62,
1176            0x00, 0x00, 0x02, 0x62
1177        };
1178        static const int32_t utf32OffsetsBack[]={
1179            0,0,0,0,
1180            1,1,1,1,
1181            2,2,2,2,
1182            4,4,4,4,
1183            5,5,5,5,
1184            6,6,6,6,
1185            7,7,7,7,
1186            8,8,8,8
1187        };
1188
1189        testConvertToU(utf32, sizeof(utf32),
1190                       utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
1191        testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1192            utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1193    }
1194
1195    /* Test UTF-32LE bad data handling*/
1196    {
1197        static const uint8_t utf32[]={
1198            0x61, 0x00, 0x00, 0x00,
1199            0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
1200            0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1201            0x62, 0x00, 0x00, 0x00,
1202            0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1203            0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
1204            0x62, 0x01, 0x00, 0x00,
1205            0x62, 0x02, 0x00, 0x00,
1206        };
1207
1208        static const uint16_t utf32Expected[]={
1209            0x0061,
1210            0xfffd,         /* 0x110000 out of range */
1211            0xDBFF,         /* 0x10FFFF in range */
1212            0xDFFF,
1213            0x0062,
1214            0xfffd,         /* 0xffffffff out of range */
1215            0xfffd,         /* 0x7fffffff out of range */
1216            0x0162,
1217            0x0262
1218        };
1219        static const int32_t utf32Offsets[]={
1220            0, 4, 8, 8, 12, 16, 20, 24, 28
1221        };
1222        static const uint8_t utf32ExpectedBack[]={
1223            0x61, 0x00, 0x00, 0x00,
1224            0xfd, 0xff, 0x00, 0x00,         /* 0x110000 out of range */
1225            0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1226            0x62, 0x00, 0x00, 0x00,
1227            0xfd, 0xff, 0x00, 0x00,         /* 0xffffffff out of range */
1228            0xfd, 0xff, 0x00, 0x00,         /* 0x7fffffff out of range */
1229            0x62, 0x01, 0x00, 0x00,
1230            0x62, 0x02, 0x00, 0x00
1231        };
1232        static const int32_t utf32OffsetsBack[]={
1233            0,0,0,0,
1234            1,1,1,1,
1235            2,2,2,2,
1236            4,4,4,4,
1237            5,5,5,5,
1238            6,6,6,6,
1239            7,7,7,7,
1240            8,8,8,8
1241        };
1242        testConvertToU(utf32, sizeof(utf32),
1243            utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
1244        testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1245            utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1246    }
1247}
1248
1249static void TestCoverageMBCS(){
1250#if 0
1251    UErrorCode status = U_ZERO_ERROR;
1252    const char *directory = loadTestData(&status);
1253    char* tdpath = NULL;
1254    char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1255    int len = strlen(directory);
1256    char* index=NULL;
1257
1258    tdpath = (char*) malloc(sizeof(char) * (len * 2));
1259    uprv_strcpy(saveDirectory,u_getDataDirectory());
1260    log_verbose("Retrieved data directory %s \n",saveDirectory);
1261    uprv_strcpy(tdpath,directory);
1262    index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1263
1264    if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1265            *(index+1)=0;
1266    }
1267    u_setDataDirectory(tdpath);
1268    log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1269#endif
1270
1271    /*some more test to increase the code coverage in MBCS.  Create an test converter from test1.ucm
1272      which is test file for MBCS conversion with single-byte codepage data.*/
1273    {
1274
1275        /* MBCS with single byte codepage data test1.ucm*/
1276        const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1277        const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1278        int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
1279
1280        /*from Unicode*/
1281        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1282            expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1283    }
1284
1285    /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
1286      which is test file for MBCS conversion with three-byte codepage data.*/
1287    {
1288
1289        /* MBCS with three byte codepage data test3.ucm*/
1290        const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1291        const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a,  0xff,};
1292        int32_t  totest3Offs[]        = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1293
1294        const uint8_t test3input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1295        const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1296        int32_t fromtest3Offs[]       = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1297
1298        /*from Unicode*/
1299        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1300            expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1301
1302        /*to Unicode*/
1303        testConvertToU(test3input, sizeof(test3input),
1304            expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
1305
1306    }
1307
1308    /*some more test to increase the code coverage in MBCS.  Create an test converter from test4.ucm
1309      which is test file for MBCS conversion with four-byte codepage data.*/
1310    {
1311
1312        /* MBCS with three byte codepage data test4.ucm*/
1313        static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1314        static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1315        static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1316
1317        static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1318        static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1319        static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1320
1321        /*from Unicode*/
1322        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1323            expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1324
1325        /*to Unicode*/
1326        testConvertToU(test4input, sizeof(test4input),
1327            expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
1328
1329    }
1330#if 0
1331    free(tdpath);
1332    /* restore the original data directory */
1333    log_verbose("Setting the data directory to %s \n", saveDirectory);
1334    u_setDataDirectory(saveDirectory);
1335    free(saveDirectory);
1336#endif
1337
1338}
1339
1340static void TestConverterType(const char *convName, UConverterType convType) {
1341    UConverter* myConverter;
1342    UErrorCode err = U_ZERO_ERROR;
1343
1344    myConverter = my_ucnv_open(convName, &err);
1345
1346    if (U_FAILURE(err)) {
1347        log_data_err("Failed to create an %s converter\n", convName);
1348        return;
1349    }
1350    else
1351    {
1352        if (ucnv_getType(myConverter)!=convType) {
1353            log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1354                convName, convType);
1355        }
1356        else {
1357            log_verbose("ucnv_getType %s ok\n", convName);
1358        }
1359    }
1360    ucnv_close(myConverter);
1361}
1362
1363static void TestConverterTypesAndStarters()
1364{
1365#if !UCONFIG_NO_LEGACY_CONVERSION
1366    UConverter* myConverter;
1367    UErrorCode err = U_ZERO_ERROR;
1368    UBool mystarters[256];
1369
1370/*    const UBool expectedKSCstarters[256] = {
1371        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1372        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1373        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1374        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1375        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1376        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1377        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1378        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1379        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1380        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1381        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1384        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1385        FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1386        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1387        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1388        TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1389        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1390        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1391        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1392        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1393        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1394        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1395        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1396        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1397
1398
1399    log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1400
1401    myConverter = ucnv_open("ksc", &err);
1402    if (U_FAILURE(err)) {
1403      log_data_err("Failed to create an ibm-ksc converter\n");
1404      return;
1405    }
1406    else
1407    {
1408        if (ucnv_getType(myConverter)!=UCNV_MBCS)
1409            log_err("ucnv_getType Failed for ibm-949\n");
1410        else
1411            log_verbose("ucnv_getType ibm-949 ok\n");
1412
1413        if(myConverter!=NULL)
1414            ucnv_getStarters(myConverter, mystarters, &err);
1415
1416        /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1417          log_err("Failed ucnv_getStarters for ksc\n");
1418          else
1419          log_verbose("ucnv_getStarters ok\n");*/
1420
1421    }
1422    ucnv_close(myConverter);
1423
1424    TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1425    TestConverterType("ibm-878", UCNV_SBCS);
1426#endif
1427
1428    TestConverterType("iso-8859-1", UCNV_LATIN_1);
1429
1430    TestConverterType("ibm-1208", UCNV_UTF8);
1431
1432    TestConverterType("utf-8", UCNV_UTF8);
1433    TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1434    TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1435    TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1436    TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1437
1438#if !UCONFIG_NO_LEGACY_CONVERSION
1439
1440#if defined(U_ENABLE_GENERIC_ISO_2022)
1441    TestConverterType("iso-2022", UCNV_ISO_2022);
1442#endif
1443
1444    TestConverterType("hz", UCNV_HZ);
1445#endif
1446
1447    TestConverterType("scsu", UCNV_SCSU);
1448
1449#if !UCONFIG_NO_LEGACY_CONVERSION
1450    TestConverterType("x-iscii-de", UCNV_ISCII);
1451#endif
1452
1453    TestConverterType("ascii", UCNV_US_ASCII);
1454    TestConverterType("utf-7", UCNV_UTF7);
1455    TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1456    TestConverterType("bocu-1", UCNV_BOCU1);
1457}
1458
1459static void
1460TestAmbiguousConverter(UConverter *cnv) {
1461    static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1462    UChar outUnicode[20]={ 0, 0, 0, 0 };
1463
1464    const char *s;
1465    UChar *u;
1466    UErrorCode errorCode;
1467    UBool isAmbiguous;
1468
1469    /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1470    errorCode=U_ZERO_ERROR;
1471    s=inBytes;
1472    u=outUnicode;
1473    ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
1474    if(U_FAILURE(errorCode)) {
1475        /* we do not care about general failures in this test; the input may just not be mappable */
1476        return;
1477    }
1478
1479    if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1480        /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1481        /* There are some encodings that are partially ASCII based,
1482        like the ISO-7 and GSM series of codepages, which we ignore. */
1483        return;
1484    }
1485
1486    isAmbiguous=ucnv_isAmbiguous(cnv);
1487
1488    /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1489    if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1490        log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1491            ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1492        return;
1493    }
1494
1495    if(outUnicode[2]!=0x5c) {
1496        /* needs fixup, fix it */
1497        ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1498        if(outUnicode[2]!=0x5c) {
1499            /* the fix failed */
1500            log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1501            return;
1502        }
1503    }
1504}
1505
1506static void TestAmbiguous()
1507{
1508    UErrorCode status = U_ZERO_ERROR;
1509    UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1510    static const char target[] = {
1511        /* "\\usr\\local\\share\\data\\icutest.txt" */
1512        0x5c, 0x75, 0x73, 0x72,
1513        0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1514        0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1515        0x5c, 0x64, 0x61, 0x74, 0x61,
1516        0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1517        0
1518    };
1519    UChar asciiResult[200], sjisResult[200];
1520    int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1521    const char *name;
1522
1523    /* enumerate all converters */
1524    status=U_ZERO_ERROR;
1525    for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1526        cnv=ucnv_open(name, &status);
1527        if(U_SUCCESS(status)) {
1528            TestAmbiguousConverter(cnv);
1529            ucnv_close(cnv);
1530        } else {
1531            log_err("error: unable to open available converter \"%s\"\n", name);
1532            status=U_ZERO_ERROR;
1533        }
1534    }
1535
1536#if !UCONFIG_NO_LEGACY_CONVERSION
1537    sjis_cnv = ucnv_open("ibm-943", &status);
1538    if (U_FAILURE(status))
1539    {
1540        log_data_err("Failed to create a SJIS converter\n");
1541        return;
1542    }
1543    ascii_cnv = ucnv_open("LATIN-1", &status);
1544    if (U_FAILURE(status))
1545    {
1546        log_data_err("Failed to create a LATIN-1 converter\n");
1547        ucnv_close(sjis_cnv);
1548        return;
1549    }
1550    /* convert target from SJIS to Unicode */
1551    sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1552    if (U_FAILURE(status))
1553    {
1554        log_err("Failed to convert the SJIS string.\n");
1555        ucnv_close(sjis_cnv);
1556        ucnv_close(ascii_cnv);
1557        return;
1558    }
1559    /* convert target from Latin-1 to Unicode */
1560    /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1561    if (U_FAILURE(status))
1562    {
1563        log_err("Failed to convert the Latin-1 string.\n");
1564        ucnv_close(sjis_cnv);
1565        ucnv_close(ascii_cnv);
1566        return;
1567    }
1568    if (!ucnv_isAmbiguous(sjis_cnv))
1569    {
1570        log_err("SJIS converter should contain ambiguous character mappings.\n");
1571        ucnv_close(sjis_cnv);
1572        ucnv_close(ascii_cnv);
1573        return;
1574    }
1575    if (u_strcmp(sjisResult, asciiResult) == 0)
1576    {
1577        log_err("File separators for SJIS don't need to be fixed.\n");
1578    }
1579    ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1580    if (u_strcmp(sjisResult, asciiResult) != 0)
1581    {
1582        log_err("Fixing file separator for SJIS failed.\n");
1583    }
1584    ucnv_close(sjis_cnv);
1585    ucnv_close(ascii_cnv);
1586#endif
1587}
1588
1589static void
1590TestSignatureDetection(){
1591    /* with null terminated strings */
1592    {
1593        static const char* data[] = {
1594                "\xFE\xFF\x00\x00",     /* UTF-16BE */
1595                "\xFF\xFE\x00\x00",     /* UTF-16LE */
1596                "\xEF\xBB\xBF\x00",     /* UTF-8    */
1597                "\x0E\xFE\xFF\x00",     /* SCSU     */
1598
1599                "\xFE\xFF",             /* UTF-16BE */
1600                "\xFF\xFE",             /* UTF-16LE */
1601                "\xEF\xBB\xBF",         /* UTF-8    */
1602                "\x0E\xFE\xFF",         /* SCSU     */
1603
1604                "\xFE\xFF\x41\x42",     /* UTF-16BE */
1605                "\xFF\xFE\x41\x41",     /* UTF-16LE */
1606                "\xEF\xBB\xBF\x41",     /* UTF-8    */
1607                "\x0E\xFE\xFF\x41",     /* SCSU     */
1608
1609                "\x2B\x2F\x76\x38\x2D", /* UTF-7    */
1610                "\x2B\x2F\x76\x38\x41", /* UTF-7    */
1611                "\x2B\x2F\x76\x39\x41", /* UTF-7    */
1612                "\x2B\x2F\x76\x2B\x41", /* UTF-7    */
1613                "\x2B\x2F\x76\x2F\x41",  /* UTF-7    */
1614
1615                "\xDD\x73\x66\x73"      /* UTF-EBCDIC */
1616        };
1617        static const char* expected[] = {
1618                "UTF-16BE",
1619                "UTF-16LE",
1620                "UTF-8",
1621                "SCSU",
1622
1623                "UTF-16BE",
1624                "UTF-16LE",
1625                "UTF-8",
1626                "SCSU",
1627
1628                "UTF-16BE",
1629                "UTF-16LE",
1630                "UTF-8",
1631                "SCSU",
1632
1633                "UTF-7",
1634                "UTF-7",
1635                "UTF-7",
1636                "UTF-7",
1637                "UTF-7",
1638                "UTF-EBCDIC"
1639        };
1640        static const int32_t expectedLength[] ={
1641            2,
1642            2,
1643            3,
1644            3,
1645
1646            2,
1647            2,
1648            3,
1649            3,
1650
1651            2,
1652            2,
1653            3,
1654            3,
1655
1656            5,
1657            4,
1658            4,
1659            4,
1660            4,
1661            4
1662        };
1663        int i=0;
1664        UErrorCode err;
1665        int32_t signatureLength = -1;
1666        const char* source = NULL;
1667        const char* enc = NULL;
1668        for( ; i<sizeof(data)/sizeof(char*); i++){
1669            err = U_ZERO_ERROR;
1670            source = data[i];
1671            enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1672            if(U_FAILURE(err)){
1673                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1674                continue;
1675            }
1676            if(enc == NULL || strcmp(enc,expected[i]) !=0){
1677                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1678                continue;
1679            }
1680            if(signatureLength != expectedLength[i]){
1681                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1682            }
1683        }
1684    }
1685    {
1686        static const char* data[] = {
1687                "\xFE\xFF\x00",         /* UTF-16BE */
1688                "\xFF\xFE\x00",         /* UTF-16LE */
1689                "\xEF\xBB\xBF\x00",     /* UTF-8    */
1690                "\x0E\xFE\xFF\x00",     /* SCSU     */
1691                "\x00\x00\xFE\xFF",     /* UTF-32BE */
1692                "\xFF\xFE\x00\x00",     /* UTF-32LE */
1693                "\xFE\xFF",             /* UTF-16BE */
1694                "\xFF\xFE",             /* UTF-16LE */
1695                "\xEF\xBB\xBF",         /* UTF-8    */
1696                "\x0E\xFE\xFF",         /* SCSU     */
1697                "\x00\x00\xFE\xFF",     /* UTF-32BE */
1698                "\xFF\xFE\x00\x00",     /* UTF-32LE */
1699                "\xFE\xFF\x41\x42",     /* UTF-16BE */
1700                "\xFF\xFE\x41\x41",     /* UTF-16LE */
1701                "\xEF\xBB\xBF\x41",     /* UTF-8    */
1702                "\x0E\xFE\xFF\x41",     /* SCSU     */
1703                "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1704                "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1705                "\xFB\xEE\x28",         /* BOCU-1   */
1706                "\xFF\x41\x42"          /* NULL     */
1707        };
1708        static const int len[] = {
1709            3,
1710            3,
1711            4,
1712            4,
1713            4,
1714            4,
1715            2,
1716            2,
1717            3,
1718            3,
1719            4,
1720            4,
1721            4,
1722            4,
1723            4,
1724            4,
1725            5,
1726            5,
1727            3,
1728            3
1729        };
1730
1731        static const char* expected[] = {
1732                "UTF-16BE",
1733                "UTF-16LE",
1734                "UTF-8",
1735                "SCSU",
1736                "UTF-32BE",
1737                "UTF-32LE",
1738                "UTF-16BE",
1739                "UTF-16LE",
1740                "UTF-8",
1741                "SCSU",
1742                "UTF-32BE",
1743                "UTF-32LE",
1744                "UTF-16BE",
1745                "UTF-16LE",
1746                "UTF-8",
1747                "SCSU",
1748                "UTF-32BE",
1749                "UTF-32LE",
1750                "BOCU-1",
1751                NULL
1752        };
1753        static const int32_t expectedLength[] ={
1754            2,
1755            2,
1756            3,
1757            3,
1758            4,
1759            4,
1760            2,
1761            2,
1762            3,
1763            3,
1764            4,
1765            4,
1766            2,
1767            2,
1768            3,
1769            3,
1770            4,
1771            4,
1772            3,
1773            0
1774        };
1775        int i=0;
1776        UErrorCode err;
1777        int32_t signatureLength = -1;
1778        int32_t sourceLength=-1;
1779        const char* source = NULL;
1780        const char* enc = NULL;
1781        for( ; i<sizeof(data)/sizeof(char*); i++){
1782            err = U_ZERO_ERROR;
1783            source = data[i];
1784            sourceLength = len[i];
1785            enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1786            if(U_FAILURE(err)){
1787                log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1788                continue;
1789            }
1790            if(enc == NULL || strcmp(enc,expected[i]) !=0){
1791                if(expected[i] !=NULL){
1792                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1793                 continue;
1794                }
1795            }
1796            if(signatureLength != expectedLength[i]){
1797                log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1798            }
1799        }
1800    }
1801}
1802
1803static void TestUTF7() {
1804    /* test input */
1805    static const uint8_t in[]={
1806        /* H - +Jjo- - ! +- +2AHcAQ */
1807        0x48,
1808        0x2d,
1809        0x2b, 0x4a, 0x6a, 0x6f,
1810        0x2d, 0x2d,
1811        0x21,
1812        0x2b, 0x2d,
1813        0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1814    };
1815
1816    /* expected test results */
1817    static const int32_t results[]={
1818        /* number of bytes read, code point */
1819        1, 0x48,
1820        1, 0x2d,
1821        4, 0x263a, /* <WHITE SMILING FACE> */
1822        2, 0x2d,
1823        1, 0x21,
1824        2, 0x2b,
1825        7, 0x10401
1826    };
1827
1828    const char *cnvName;
1829    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1830    UErrorCode errorCode=U_ZERO_ERROR;
1831    UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1832    if(U_FAILURE(errorCode)) {
1833        log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1834        return;
1835    }
1836    TestNextUChar(cnv, source, limit, results, "UTF-7");
1837    /* Test the condition when source >= sourceLimit */
1838    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1839    cnvName = ucnv_getName(cnv, &errorCode);
1840    if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1841        log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1842    }
1843    ucnv_close(cnv);
1844}
1845
1846static void TestIMAP() {
1847    /* test input */
1848    static const uint8_t in[]={
1849        /* H - &Jjo- - ! &- &2AHcAQ- \ */
1850        0x48,
1851        0x2d,
1852        0x26, 0x4a, 0x6a, 0x6f,
1853        0x2d, 0x2d,
1854        0x21,
1855        0x26, 0x2d,
1856        0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1857    };
1858
1859    /* expected test results */
1860    static const int32_t results[]={
1861        /* number of bytes read, code point */
1862        1, 0x48,
1863        1, 0x2d,
1864        4, 0x263a, /* <WHITE SMILING FACE> */
1865        2, 0x2d,
1866        1, 0x21,
1867        2, 0x26,
1868        7, 0x10401
1869    };
1870
1871    const char *cnvName;
1872    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1873    UErrorCode errorCode=U_ZERO_ERROR;
1874    UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1875    if(U_FAILURE(errorCode)) {
1876        log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1877        return;
1878    }
1879    TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1880    /* Test the condition when source >= sourceLimit */
1881    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1882    cnvName = ucnv_getName(cnv, &errorCode);
1883    if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1884        log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1885    }
1886    ucnv_close(cnv);
1887}
1888
1889static void TestUTF8() {
1890    /* test input */
1891    static const uint8_t in[]={
1892        0x61,
1893        0xc2, 0x80,
1894        0xe0, 0xa0, 0x80,
1895        0xf0, 0x90, 0x80, 0x80,
1896        0xf4, 0x84, 0x8c, 0xa1,
1897        0xf0, 0x90, 0x90, 0x81
1898    };
1899
1900    /* expected test results */
1901    static const int32_t results[]={
1902        /* number of bytes read, code point */
1903        1, 0x61,
1904        2, 0x80,
1905        3, 0x800,
1906        4, 0x10000,
1907        4, 0x104321,
1908        4, 0x10401
1909    };
1910
1911    /* error test input */
1912    static const uint8_t in2[]={
1913        0x61,
1914        0xc0, 0x80,                     /* illegal non-shortest form */
1915        0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1916        0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1917        0xc0, 0xc0,                     /* illegal trail byte */
1918        0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1919        0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1920        0xfe,                           /* illegal byte altogether */
1921        0x62
1922    };
1923
1924    /* expected error test results */
1925    static const int32_t results2[]={
1926        /* number of bytes read, code point */
1927        1, 0x61,
1928        22, 0x62
1929    };
1930
1931    UConverterToUCallback cb;
1932    const void *p;
1933
1934    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1935    UErrorCode errorCode=U_ZERO_ERROR;
1936    UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1937    if(U_FAILURE(errorCode)) {
1938        log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1939        return;
1940    }
1941    TestNextUChar(cnv, source, limit, results, "UTF-8");
1942    /* Test the condition when source >= sourceLimit */
1943    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1944
1945    /* test error behavior with a skip callback */
1946    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1947    source=(const char *)in2;
1948    limit=(const char *)(in2+sizeof(in2));
1949    TestNextUChar(cnv, source, limit, results2, "UTF-8");
1950
1951    ucnv_close(cnv);
1952}
1953
1954static void TestCESU8() {
1955    /* test input */
1956    static const uint8_t in[]={
1957        0x61,
1958        0xc2, 0x80,
1959        0xe0, 0xa0, 0x80,
1960        0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1961        0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1962        0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1963        0xef, 0xbf, 0xbc
1964    };
1965
1966    /* expected test results */
1967    static const int32_t results[]={
1968        /* number of bytes read, code point */
1969        1, 0x61,
1970        2, 0x80,
1971        3, 0x800,
1972        6, 0x10000,
1973        3, 0xdc01,
1974        -1,0xd802,  /* may read 3 or 6 bytes */
1975        -1,0x10ffff,/* may read 0 or 3 bytes */
1976        3, 0xfffc
1977    };
1978
1979    /* error test input */
1980    static const uint8_t in2[]={
1981        0x61,
1982        0xc0, 0x80,                     /* illegal non-shortest form */
1983        0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1984        0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1985        0xc0, 0xc0,                     /* illegal trail byte */
1986        0xf0, 0x90, 0x80, 0x80,         /* illegal 4-byte supplementary code point */
1987        0xf4, 0x84, 0x8c, 0xa1,         /* illegal 4-byte supplementary code point */
1988        0xf0, 0x90, 0x90, 0x81,         /* illegal 4-byte supplementary code point */
1989        0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1990        0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1991        0xfe,                           /* illegal byte altogether */
1992        0x62
1993    };
1994
1995    /* expected error test results */
1996    static const int32_t results2[]={
1997        /* number of bytes read, code point */
1998        1, 0x61,
1999        34, 0x62
2000    };
2001
2002    UConverterToUCallback cb;
2003    const void *p;
2004
2005    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2006    UErrorCode errorCode=U_ZERO_ERROR;
2007    UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2008    if(U_FAILURE(errorCode)) {
2009        log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2010        return;
2011    }
2012    TestNextUChar(cnv, source, limit, results, "CESU-8");
2013    /* Test the condition when source >= sourceLimit */
2014    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2015
2016    /* test error behavior with a skip callback */
2017    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2018    source=(const char *)in2;
2019    limit=(const char *)(in2+sizeof(in2));
2020    TestNextUChar(cnv, source, limit, results2, "CESU-8");
2021
2022    ucnv_close(cnv);
2023}
2024
2025static void TestUTF16() {
2026    /* test input */
2027    static const uint8_t in1[]={
2028        0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2029    };
2030    static const uint8_t in2[]={
2031        0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2032    };
2033    static const uint8_t in3[]={
2034        0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2035    };
2036
2037    /* expected test results */
2038    static const int32_t results1[]={
2039        /* number of bytes read, code point */
2040        4, 0x4e00,
2041        2, 0xfeff
2042    };
2043    static const int32_t results2[]={
2044        /* number of bytes read, code point */
2045        4, 0x004e,
2046        2, 0xfffe
2047    };
2048    static const int32_t results3[]={
2049        /* number of bytes read, code point */
2050        2, 0xfefe,
2051        2, 0x4e00,
2052        2, 0xfeff,
2053        4, 0x20001
2054    };
2055
2056    const char *source, *limit;
2057
2058    UErrorCode errorCode=U_ZERO_ERROR;
2059    UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2060    if(U_FAILURE(errorCode)) {
2061        log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2062        return;
2063    }
2064
2065    source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2066    TestNextUChar(cnv, source, limit, results1, "UTF-16");
2067
2068    source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2069    ucnv_resetToUnicode(cnv);
2070    TestNextUChar(cnv, source, limit, results2, "UTF-16");
2071
2072    source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2073    ucnv_resetToUnicode(cnv);
2074    TestNextUChar(cnv, source, limit, results3, "UTF-16");
2075
2076    /* Test the condition when source >= sourceLimit */
2077    ucnv_resetToUnicode(cnv);
2078    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2079
2080    ucnv_close(cnv);
2081}
2082
2083static void TestUTF16BE() {
2084    /* test input */
2085    static const uint8_t in[]={
2086        0x00, 0x61,
2087        0x00, 0xc0,
2088        0x00, 0x31,
2089        0x00, 0xf4,
2090        0xce, 0xfe,
2091        0xd8, 0x01, 0xdc, 0x01
2092    };
2093
2094    /* expected test results */
2095    static const int32_t results[]={
2096        /* number of bytes read, code point */
2097        2, 0x61,
2098        2, 0xc0,
2099        2, 0x31,
2100        2, 0xf4,
2101        2, 0xcefe,
2102        4, 0x10401
2103    };
2104
2105    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2106    UErrorCode errorCode=U_ZERO_ERROR;
2107    UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2108    if(U_FAILURE(errorCode)) {
2109        log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2110        return;
2111    }
2112    TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2113    /* Test the condition when source >= sourceLimit */
2114    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2115    /*Test for the condition where there is an invalid character*/
2116    {
2117        static const uint8_t source2[]={0x61};
2118        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2119        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2120    }
2121#if 0
2122    /*
2123     * Test disabled because currently the UTF-16BE/LE converters are supposed
2124     * to not set errors for unpaired surrogates.
2125     * This may change with
2126     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2127     */
2128
2129    /*Test for the condition where there is a surrogate pair*/
2130    {
2131        const uint8_t source2[]={0xd8, 0x01};
2132        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2133    }
2134#endif
2135    ucnv_close(cnv);
2136}
2137
2138static void
2139TestUTF16LE() {
2140    /* test input */
2141    static const uint8_t in[]={
2142        0x61, 0x00,
2143        0x31, 0x00,
2144        0x4e, 0x2e,
2145        0x4e, 0x00,
2146        0x01, 0xd8, 0x01, 0xdc
2147    };
2148
2149    /* expected test results */
2150    static const int32_t results[]={
2151        /* number of bytes read, code point */
2152        2, 0x61,
2153        2, 0x31,
2154        2, 0x2e4e,
2155        2, 0x4e,
2156        4, 0x10401
2157    };
2158
2159    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2160    UErrorCode errorCode=U_ZERO_ERROR;
2161    UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2162    if(U_FAILURE(errorCode)) {
2163        log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2164        return;
2165    }
2166    TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2167    /* Test the condition when source >= sourceLimit */
2168    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2169    /*Test for the condition where there is an invalid character*/
2170    {
2171        static const uint8_t source2[]={0x61};
2172        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2173        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2174    }
2175#if 0
2176    /*
2177     * Test disabled because currently the UTF-16BE/LE converters are supposed
2178     * to not set errors for unpaired surrogates.
2179     * This may change with
2180     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2181     */
2182
2183    /*Test for the condition where there is a surrogate character*/
2184    {
2185        static const uint8_t source2[]={0x01, 0xd8};
2186        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2187    }
2188#endif
2189
2190    ucnv_close(cnv);
2191}
2192
2193static void TestUTF32() {
2194    /* test input */
2195    static const uint8_t in1[]={
2196        0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff
2197    };
2198    static const uint8_t in2[]={
2199        0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00
2200    };
2201    static const uint8_t in3[]={
2202        0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01
2203    };
2204
2205    /* expected test results */
2206    static const int32_t results1[]={
2207        /* number of bytes read, code point */
2208        8, 0x100f00,
2209        4, 0xfeff
2210    };
2211    static const int32_t results2[]={
2212        /* number of bytes read, code point */
2213        8, 0x0f1000,
2214        4, 0xfffe
2215    };
2216    static const int32_t results3[]={
2217        /* number of bytes read, code point */
2218        4, 0xfefe,
2219        4, 0x100f00,
2220        4, 0xfffd, /* unmatched surrogate */
2221        4, 0xfffd  /* unmatched surrogate */
2222    };
2223
2224    const char *source, *limit;
2225
2226    UErrorCode errorCode=U_ZERO_ERROR;
2227    UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2228    if(U_FAILURE(errorCode)) {
2229        log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2230        return;
2231    }
2232
2233    source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2234    TestNextUChar(cnv, source, limit, results1, "UTF-32");
2235
2236    source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2237    ucnv_resetToUnicode(cnv);
2238    TestNextUChar(cnv, source, limit, results2, "UTF-32");
2239
2240    source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2241    ucnv_resetToUnicode(cnv);
2242    TestNextUChar(cnv, source, limit, results3, "UTF-32");
2243
2244    /* Test the condition when source >= sourceLimit */
2245    ucnv_resetToUnicode(cnv);
2246    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2247
2248    ucnv_close(cnv);
2249}
2250
2251static void
2252TestUTF32BE() {
2253    /* test input */
2254    static const uint8_t in[]={
2255        0x00, 0x00, 0x00, 0x61,
2256        0x00, 0x00, 0x30, 0x61,
2257        0x00, 0x00, 0xdc, 0x00,
2258        0x00, 0x00, 0xd8, 0x00,
2259        0x00, 0x00, 0xdf, 0xff,
2260        0x00, 0x00, 0xff, 0xfe,
2261        0x00, 0x10, 0xab, 0xcd,
2262        0x00, 0x10, 0xff, 0xff
2263    };
2264
2265    /* expected test results */
2266    static const int32_t results[]={
2267        /* number of bytes read, code point */
2268        4, 0x61,
2269        4, 0x3061,
2270        4, 0xfffd,
2271        4, 0xfffd,
2272        4, 0xfffd,
2273        4, 0xfffe,
2274        4, 0x10abcd,
2275        4, 0x10ffff
2276    };
2277
2278    /* error test input */
2279    static const uint8_t in2[]={
2280        0x00, 0x00, 0x00, 0x61,
2281        0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
2282        0x00, 0x00, 0x00, 0x62,
2283        0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2284        0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
2285        0x00, 0x00, 0x01, 0x62,
2286        0x00, 0x00, 0x02, 0x62
2287    };
2288
2289    /* expected error test results */
2290    static const int32_t results2[]={
2291        /* number of bytes read, code point */
2292        4,  0x61,
2293        8,  0x62,
2294        12, 0x162,
2295        4,  0x262
2296    };
2297
2298    UConverterToUCallback cb;
2299    const void *p;
2300
2301    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2302    UErrorCode errorCode=U_ZERO_ERROR;
2303    UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2304    if(U_FAILURE(errorCode)) {
2305        log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2306        return;
2307    }
2308    TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2309
2310    /* Test the condition when source >= sourceLimit */
2311    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2312
2313    /* test error behavior with a skip callback */
2314    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2315    source=(const char *)in2;
2316    limit=(const char *)(in2+sizeof(in2));
2317    TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2318
2319    ucnv_close(cnv);
2320}
2321
2322static void
2323TestUTF32LE() {
2324    /* test input */
2325    static const uint8_t in[]={
2326        0x61, 0x00, 0x00, 0x00,
2327        0x61, 0x30, 0x00, 0x00,
2328        0x00, 0xdc, 0x00, 0x00,
2329        0x00, 0xd8, 0x00, 0x00,
2330        0xff, 0xdf, 0x00, 0x00,
2331        0xfe, 0xff, 0x00, 0x00,
2332        0xcd, 0xab, 0x10, 0x00,
2333        0xff, 0xff, 0x10, 0x00
2334    };
2335
2336    /* expected test results */
2337    static const int32_t results[]={
2338        /* number of bytes read, code point */
2339        4, 0x61,
2340        4, 0x3061,
2341        4, 0xfffd,
2342        4, 0xfffd,
2343        4, 0xfffd,
2344        4, 0xfffe,
2345        4, 0x10abcd,
2346        4, 0x10ffff
2347    };
2348
2349    /* error test input */
2350    static const uint8_t in2[]={
2351        0x61, 0x00, 0x00, 0x00,
2352        0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
2353        0x62, 0x00, 0x00, 0x00,
2354        0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2355        0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
2356        0x62, 0x01, 0x00, 0x00,
2357        0x62, 0x02, 0x00, 0x00,
2358    };
2359
2360    /* expected error test results */
2361    static const int32_t results2[]={
2362        /* number of bytes read, code point */
2363        4,  0x61,
2364        8,  0x62,
2365        12, 0x162,
2366        4,  0x262,
2367    };
2368
2369    UConverterToUCallback cb;
2370    const void *p;
2371
2372    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2373    UErrorCode errorCode=U_ZERO_ERROR;
2374    UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2375    if(U_FAILURE(errorCode)) {
2376        log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2377        return;
2378    }
2379    TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2380
2381    /* Test the condition when source >= sourceLimit */
2382    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2383
2384    /* test error behavior with a skip callback */
2385    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2386    source=(const char *)in2;
2387    limit=(const char *)(in2+sizeof(in2));
2388    TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2389
2390    ucnv_close(cnv);
2391}
2392
2393static void
2394TestLATIN1() {
2395    /* test input */
2396    static const uint8_t in[]={
2397       0x61,
2398       0x31,
2399       0x32,
2400       0xc0,
2401       0xf0,
2402       0xf4,
2403    };
2404
2405    /* expected test results */
2406    static const int32_t results[]={
2407        /* number of bytes read, code point */
2408        1, 0x61,
2409        1, 0x31,
2410        1, 0x32,
2411        1, 0xc0,
2412        1, 0xf0,
2413        1, 0xf4,
2414    };
2415    static const uint16_t in1[] = {
2416        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2417        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2418        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2419        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2420        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2421        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2422        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2423        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2424        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2425        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2426        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2427        0xcb, 0x82
2428    };
2429    static const uint8_t out1[] = {
2430        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2431        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2432        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2433        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2434        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2435        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2436        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2437        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2438        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2439        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2440        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2441        0xcb, 0x82
2442    };
2443    static const uint16_t in2[]={
2444        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2445        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2446        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2447        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2448        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2449        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2450        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2451        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2452        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2453        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2454        0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2455        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2456        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2457        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2458        0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2459        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2460        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2461        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2462        0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2463        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2464        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2465        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2466        0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2467        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2468        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2469        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2470        0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2471        0x37, 0x20, 0x2A, 0x2F,
2472    };
2473    static const unsigned char out2[]={
2474        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2475        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2476        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2477        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2478        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2479        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2480        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2481        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2482        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2483        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2484        0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2485        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2486        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2487        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2488        0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2489        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2490        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2491        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2492        0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2493        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2494        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2495        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2496        0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2497        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2498        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2499        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2500        0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2501        0x37, 0x20, 0x2A, 0x2F,
2502    };
2503    const char *source=(const char *)in;
2504    const char *limit=(const char *)in+sizeof(in);
2505
2506    UErrorCode errorCode=U_ZERO_ERROR;
2507    UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2508    if(U_FAILURE(errorCode)) {
2509        log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2510        return;
2511    }
2512    TestNextUChar(cnv, source, limit, results, "LATIN_1");
2513    /* Test the condition when source >= sourceLimit */
2514    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2515    TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2516    TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2517
2518    ucnv_close(cnv);
2519}
2520
2521static void
2522TestSBCS() {
2523    /* test input */
2524    static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2525    /* expected test results */
2526    static const int32_t results[]={
2527        /* number of bytes read, code point */
2528        1, 0x61,
2529        1, 0xbf,
2530        1, 0xc4,
2531        1, 0x2021,
2532        1, 0xf8ff,
2533        1, 0x00d9
2534    };
2535
2536    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2537    UErrorCode errorCode=U_ZERO_ERROR;
2538    UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2539    if(U_FAILURE(errorCode)) {
2540        log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2541        return;
2542    }
2543    TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2544    /* Test the condition when source >= sourceLimit */
2545    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2546    /*Test for Illegal character */ /*
2547    {
2548    static const uint8_t input1[]={ 0xA1 };
2549    const char* illegalsource=(const char*)input1;
2550    TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2551    }
2552   */
2553    ucnv_close(cnv);
2554}
2555
2556static void
2557TestDBCS() {
2558    /* test input */
2559    static const uint8_t in[]={
2560        0x44, 0x6a,
2561        0xc4, 0x9c,
2562        0x7a, 0x74,
2563        0x46, 0xab,
2564        0x42, 0x5b,
2565
2566    };
2567
2568    /* expected test results */
2569    static const int32_t results[]={
2570        /* number of bytes read, code point */
2571        2, 0x00a7,
2572        2, 0xe1d2,
2573        2, 0x6962,
2574        2, 0xf842,
2575        2, 0xffe5,
2576    };
2577
2578    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2579    UErrorCode errorCode=U_ZERO_ERROR;
2580
2581    UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2582    if(U_FAILURE(errorCode)) {
2583        log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2584        return;
2585    }
2586    TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2587    /* Test the condition when source >= sourceLimit */
2588    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2589    /*Test for the condition where there is an invalid character*/
2590    {
2591        static const uint8_t source2[]={0x1a, 0x1b};
2592        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2593    }
2594    /*Test for the condition where we have a truncated char*/
2595    {
2596        static const uint8_t source1[]={0xc4};
2597        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2598        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2599    }
2600    ucnv_close(cnv);
2601}
2602
2603static void
2604TestMBCS() {
2605    /* test input */
2606    static const uint8_t in[]={
2607        0x01,
2608        0xa6, 0xa3,
2609        0x00,
2610        0xa6, 0xa1,
2611        0x08,
2612        0xc2, 0x76,
2613        0xc2, 0x78,
2614
2615    };
2616
2617    /* expected test results */
2618    static const int32_t results[]={
2619        /* number of bytes read, code point */
2620        1, 0x0001,
2621        2, 0x250c,
2622        1, 0x0000,
2623        2, 0x2500,
2624        1, 0x0008,
2625        2, 0xd60c,
2626        2, 0xd60e,
2627    };
2628
2629    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2630    UErrorCode errorCode=U_ZERO_ERROR;
2631
2632    UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2633    if(U_FAILURE(errorCode)) {
2634        log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2635        return;
2636    }
2637    TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2638    /* Test the condition when source >= sourceLimit */
2639    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2640    /*Test for the condition where there is an invalid character*/
2641    {
2642        static const uint8_t source2[]={0xa1, 0x80};
2643        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2644    }
2645    /*Test for the condition where we have a truncated char*/
2646    {
2647        static const uint8_t source1[]={0xc4};
2648        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2649        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2650    }
2651    ucnv_close(cnv);
2652
2653}
2654
2655#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2656static void
2657TestICCRunout() {
2658/*    { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2659
2660    const char *cnvName = "ibm-1363";
2661    UErrorCode status = U_ZERO_ERROR;
2662    const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2663    /* UChar   expectUData[] = { 0x00a1, 0x001a }; */
2664    const char *source = sourceData;
2665    const char *sourceLim = sourceData+sizeof(sourceData);
2666    UChar c1, c2, c3;
2667    UConverter *cnv=ucnv_open(cnvName, &status);
2668    if(U_FAILURE(status)) {
2669        log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2670	return;
2671    }
2672
2673#if 0
2674    {
2675    UChar   targetBuf[256];
2676    UChar   *target = targetBuf;
2677    UChar   *targetLim = target+256;
2678    ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
2679
2680    log_info("After convert: target@%d, source@%d, status%s\n",
2681	     target-targetBuf, source-sourceData, u_errorName(status));
2682
2683    if(U_FAILURE(status)) {
2684	log_err("Failed to convert: %s\n", u_errorName(status));
2685    } else {
2686
2687    }
2688    }
2689#endif
2690
2691    c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2692    log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2693
2694    c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2695    log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2696
2697    c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2698    log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2699
2700    if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2701	log_verbose("OK\n");
2702    } else {
2703	log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2704    }
2705
2706    ucnv_close(cnv);
2707
2708}
2709#endif
2710
2711#ifdef U_ENABLE_GENERIC_ISO_2022
2712
2713static void
2714TestISO_2022() {
2715    /* test input */
2716    static const uint8_t in[]={
2717        0x1b, 0x25, 0x42,
2718        0x31,
2719        0x32,
2720        0x61,
2721        0xc2, 0x80,
2722        0xe0, 0xa0, 0x80,
2723        0xf0, 0x90, 0x80, 0x80
2724    };
2725
2726
2727
2728    /* expected test results */
2729    static const int32_t results[]={
2730        /* number of bytes read, code point */
2731        4, 0x0031,  /* 4 bytes including the escape sequence */
2732        1, 0x0032,
2733        1, 0x61,
2734        2, 0x80,
2735        3, 0x800,
2736        4, 0x10000
2737    };
2738
2739    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2740    UErrorCode errorCode=U_ZERO_ERROR;
2741    UConverter *cnv;
2742
2743    cnv=ucnv_open("ISO_2022", &errorCode);
2744    if(U_FAILURE(errorCode)) {
2745        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2746        return;
2747    }
2748    TestNextUChar(cnv, source, limit, results, "ISO_2022");
2749
2750    /* Test the condition when source >= sourceLimit */
2751    TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2752    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2753    /*Test for the condition where we have a truncated char*/
2754    {
2755        static const uint8_t source1[]={0xc4};
2756        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2757        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2758    }
2759    /*Test for the condition where there is an invalid character*/
2760    {
2761        static const uint8_t source2[]={0xa1, 0x01};
2762        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2763    }
2764    ucnv_close(cnv);
2765}
2766
2767#endif
2768
2769static void
2770TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2771    const UChar* uSource;
2772    const UChar* uSourceLimit;
2773    const char* cSource;
2774    const char* cSourceLimit;
2775    UChar *uTargetLimit =NULL;
2776    UChar *uTarget;
2777    char *cTarget;
2778    const char *cTargetLimit;
2779    char *cBuf;
2780    UChar *uBuf; /*,*test;*/
2781    int32_t uBufSize = 120;
2782    int len=0;
2783    int i=2;
2784    UErrorCode errorCode=U_ZERO_ERROR;
2785    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2786    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2787    ucnv_reset(cnv);
2788    for(;--i>0; ){
2789        uSource = (UChar*) source;
2790        uSourceLimit=(const UChar*)sourceLimit;
2791        cTarget = cBuf;
2792        uTarget = uBuf;
2793        cSource = cBuf;
2794        cTargetLimit = cBuf;
2795        uTargetLimit = uBuf;
2796
2797        do{
2798
2799            cTargetLimit = cTargetLimit+ i;
2800            ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2801            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2802               errorCode=U_ZERO_ERROR;
2803                continue;
2804            }
2805
2806            if(U_FAILURE(errorCode)){
2807                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2808                return;
2809            }
2810
2811        }while (uSource<uSourceLimit);
2812
2813        cSourceLimit =cTarget;
2814        do{
2815            uTargetLimit=uTargetLimit+i;
2816            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2817            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2818               errorCode=U_ZERO_ERROR;
2819                continue;
2820            }
2821            if(U_FAILURE(errorCode)){
2822                   log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2823                    return;
2824            }
2825        }while(cSource<cSourceLimit);
2826
2827        uSource = source;
2828        /*test =uBuf;*/
2829        for(len=0;len<(int)(source - sourceLimit);len++){
2830            if(uBuf[len]!=uSource[len]){
2831                log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2832            }
2833        }
2834    }
2835    free(uBuf);
2836    free(cBuf);
2837}
2838/* Test for Jitterbug 778 */
2839static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2840    const UChar* uSource;
2841    const UChar* uSourceLimit;
2842    const char* cSource;
2843    UChar *uTargetLimit =NULL;
2844    UChar *uTarget;
2845    char *cTarget;
2846    const char *cTargetLimit;
2847    char *cBuf;
2848    UChar *uBuf,*test;
2849    int32_t uBufSize = 120;
2850    int numCharsInTarget=0;
2851    UErrorCode errorCode=U_ZERO_ERROR;
2852    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2853    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2854    uSource = source;
2855    uSourceLimit=sourceLimit;
2856    cTarget = cBuf;
2857    cTargetLimit = cBuf +uBufSize*5;
2858    uTarget = uBuf;
2859    uTargetLimit = uBuf+ uBufSize*5;
2860    ucnv_reset(cnv);
2861    numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2862    if(U_FAILURE(errorCode)){
2863        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2864        return;
2865    }
2866    cSource = cBuf;
2867    test =uBuf;
2868    ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2869    if(U_FAILURE(errorCode)){
2870        log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2871        return;
2872    }
2873    uSource = source;
2874    while(uSource<uSourceLimit){
2875        if(*test!=*uSource){
2876
2877            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2878        }
2879        uSource++;
2880        test++;
2881    }
2882    free(uBuf);
2883    free(cBuf);
2884}
2885
2886static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2887    const UChar* uSource;
2888    const UChar* uSourceLimit;
2889    const char* cSource;
2890    const char* cSourceLimit;
2891    UChar *uTargetLimit =NULL;
2892    UChar *uTarget;
2893    char *cTarget;
2894    const char *cTargetLimit;
2895    char *cBuf;
2896    UChar *uBuf; /*,*test;*/
2897    int32_t uBufSize = 120;
2898    int len=0;
2899    int i=2;
2900    const UChar *temp = sourceLimit;
2901    UErrorCode errorCode=U_ZERO_ERROR;
2902    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2903    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2904
2905    ucnv_reset(cnv);
2906    for(;--i>0;){
2907        uSource = (UChar*) source;
2908        cTarget = cBuf;
2909        uTarget = uBuf;
2910        cSource = cBuf;
2911        cTargetLimit = cBuf;
2912        uTargetLimit = uBuf+uBufSize*5;
2913        cTargetLimit = cTargetLimit+uBufSize*10;
2914        uSourceLimit=uSource;
2915        do{
2916
2917            if (uSourceLimit < sourceLimit) {
2918                uSourceLimit = uSourceLimit+1;
2919            }
2920            ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2921            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2922               errorCode=U_ZERO_ERROR;
2923                continue;
2924            }
2925
2926            if(U_FAILURE(errorCode)){
2927                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2928                return;
2929            }
2930
2931        }while (uSource<temp);
2932
2933        cSourceLimit =cBuf;
2934        do{
2935            if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2936                cSourceLimit = cSourceLimit+1;
2937            }
2938            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2939            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2940               errorCode=U_ZERO_ERROR;
2941                continue;
2942            }
2943            if(U_FAILURE(errorCode)){
2944                   log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2945                    return;
2946            }
2947        }while(cSource<cTarget);
2948
2949        uSource = source;
2950        /*test =uBuf;*/
2951        for(;len<(int)(source - sourceLimit);len++){
2952            if(uBuf[len]!=uSource[len]){
2953                log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2954            }
2955        }
2956    }
2957    free(uBuf);
2958    free(cBuf);
2959}
2960static void
2961TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2962                     const uint16_t results[], const char* message){
2963/*     const char* s0; */
2964     const char* s=(char*)source;
2965     const uint16_t *r=results;
2966     UErrorCode errorCode=U_ZERO_ERROR;
2967     uint32_t c,exC;
2968     ucnv_reset(cnv);
2969     while(s<limit) {
2970	 /* s0=s; */
2971        c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2972        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2973            break; /* no more significant input */
2974        } else if(U_FAILURE(errorCode)) {
2975            log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2976            break;
2977        } else {
2978            if(U16_IS_LEAD(*r)){
2979                int i =0, len = 2;
2980                U16_NEXT(r, i, len, exC);
2981                r++;
2982            }else{
2983                exC = *r;
2984            }
2985            if(c!=(uint32_t)(exC))
2986                log_err("%s ucnv_getNextUChar() Expected:  \\u%04X Got:  \\u%04X \n",message,(uint32_t) (*r),c);
2987        }
2988        r++;
2989    }
2990}
2991
2992static int TestJitterbug930(const char* enc){
2993    UErrorCode err = U_ZERO_ERROR;
2994    UConverter*converter;
2995    char out[80];
2996    char*target = out;
2997    UChar in[4];
2998    const UChar*source = in;
2999    int32_t off[80];
3000    int32_t* offsets = off;
3001    int numOffWritten=0;
3002    UBool flush = 0;
3003    converter = my_ucnv_open(enc, &err);
3004
3005    in[0] = 0x41;     /* 0x4E00;*/
3006    in[1] = 0x4E01;
3007    in[2] = 0x4E02;
3008    in[3] = 0x4E03;
3009
3010    memset(off, '*', sizeof(off));
3011
3012    ucnv_fromUnicode (converter,
3013            &target,
3014            target+2,
3015            &source,
3016            source+3,
3017            offsets,
3018            flush,
3019            &err);
3020
3021        /* writes three bytes into the output buffer: 41 1B 24
3022        * but offsets contains 0 1 1
3023    */
3024    while(*offsets< off[10]){
3025        numOffWritten++;
3026        offsets++;
3027    }
3028    log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3029    if(numOffWritten!= (int)(target-out)){
3030        log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3031    }
3032
3033    err = U_ZERO_ERROR;
3034
3035    memset(off,'*' , sizeof(off));
3036
3037    flush = 1;
3038    offsets=off;
3039    ucnv_fromUnicode (converter,
3040            &target,
3041            target+4,
3042            &source,
3043            source,
3044            offsets,
3045            flush,
3046            &err);
3047    numOffWritten=0;
3048    while(*offsets< off[10]){
3049        numOffWritten++;
3050        if(*offsets!= -1){
3051            log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3052        }
3053        offsets++;
3054    }
3055
3056    /* writes 42 43 7A into output buffer,
3057     * offsets contains -1 -1 -1
3058     */
3059    ucnv_close(converter);
3060    return 0;
3061}
3062
3063static void
3064TestHZ() {
3065    /* test input */
3066    static const uint16_t in[]={
3067            0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3068            0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3069            0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3070            0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3071            0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3072            0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3073            0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3074            0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3075            0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3076            0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3077            0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3078            0x005A, 0x005B, 0x005C, 0x000A
3079      };
3080    const UChar* uSource;
3081    const UChar* uSourceLimit;
3082    const char* cSource;
3083    const char* cSourceLimit;
3084    UChar *uTargetLimit =NULL;
3085    UChar *uTarget;
3086    char *cTarget;
3087    const char *cTargetLimit;
3088    char *cBuf;
3089    UChar *uBuf,*test;
3090    int32_t uBufSize = 120;
3091    UErrorCode errorCode=U_ZERO_ERROR;
3092    UConverter *cnv;
3093    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3094    int32_t* myOff= offsets;
3095    cnv=ucnv_open("HZ", &errorCode);
3096    if(U_FAILURE(errorCode)) {
3097        log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3098        return;
3099    }
3100
3101    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3102    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3103    uSource = (const UChar*)in;
3104    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3105    cTarget = cBuf;
3106    cTargetLimit = cBuf +uBufSize*5;
3107    uTarget = uBuf;
3108    uTargetLimit = uBuf+ uBufSize*5;
3109    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3110    if(U_FAILURE(errorCode)){
3111        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3112        return;
3113    }
3114    cSource = cBuf;
3115    cSourceLimit =cTarget;
3116    test =uBuf;
3117    myOff=offsets;
3118    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3119    if(U_FAILURE(errorCode)){
3120        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3121        return;
3122    }
3123    uSource = (const UChar*)in;
3124    while(uSource<uSourceLimit){
3125        if(*test!=*uSource){
3126
3127            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3128        }
3129        uSource++;
3130        test++;
3131    }
3132    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3133    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3134    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3135    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3136    TestJitterbug930("csISO2022JP");
3137    ucnv_close(cnv);
3138    free(offsets);
3139    free(uBuf);
3140    free(cBuf);
3141}
3142
3143static void
3144TestISCII(){
3145        /* test input */
3146    static const uint16_t in[]={
3147        /* test full range of Devanagari */
3148        0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3149        0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3150        0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3151        0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3152        0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3153        0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3154        0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3155        0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3156        0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3157        0x096D,0x096E,0x096F,
3158        /* test Soft halant*/
3159        0x0915,0x094d, 0x200D,
3160        /* test explicit halant */
3161        0x0915,0x094d, 0x200c,
3162        /* test double danda */
3163        0x965,
3164        /* test ASCII */
3165        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3166        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3167        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3168        /* tests from Lotus */
3169        0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3170        0x0930,0x094D,0x200D,
3171        0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3172        0x0915,0x0921,0x002B,0x095F,
3173        /* tamil range */
3174        0x0B86, 0xB87, 0xB88,
3175        /* telugu range */
3176        0x0C05, 0x0C02, 0x0C03,0x0c31,
3177        /* kannada range */
3178        0x0C85, 0xC82, 0x0C83,
3179        /* test Abbr sign and Anudatta */
3180        0x0970, 0x952,
3181       /* 0x0958,
3182        0x0959,
3183        0x095A,
3184        0x095B,
3185        0x095C,
3186        0x095D,
3187        0x095E,
3188        0x095F,*/
3189        0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3190        0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3191        0x090C ,
3192        0x0962,
3193        0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3194        0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3195        0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3196        0x093D /* Avagraha  0xEA, 0xE9*/,
3197        0x0958,
3198        0x0959,
3199        0x095A,
3200        0x095B,
3201        0x095C,
3202        0x095D,
3203        0x095E,
3204        0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3205      };
3206    static const unsigned char byteArr[]={
3207
3208        0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3209        0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3210        0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3211        0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3212        0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3213        0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3214        0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3215        0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3216        0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3217        0xf8,0xf9,0xfa,
3218        /* test soft halant */
3219        0xb3, 0xE8, 0xE9,
3220        /* test explicit halant */
3221        0xb3, 0xE8, 0xE8,
3222        /* test double danda */
3223        0xea, 0xea,
3224        /* test ASCII */
3225        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3226        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3227        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3228        /* test ATR code */
3229
3230        /* tests from Lotus */
3231        0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3232        0xEF,0x42,0xCF,0xE8,0xD9,
3233        0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3234        0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3235        /* tamil range */
3236        0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3237        /* telugu range */
3238        0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3239        /* kannada range */
3240        0xEF, 0x48,0xa4, 0xa2, 0xa3,
3241        /* anudatta and abbreviation sign */
3242        0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3243
3244
3245        0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3246
3247        0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3248
3249        0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3250
3251        0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3252
3253        0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3254
3255        0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3256
3257        0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3258
3259        0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3260
3261        0xB3, 0xE9, /* Ka + NUKTA */
3262
3263        0xB4, 0xE9, /* Kha + NUKTA */
3264
3265        0xB5, 0xE9, /* Ga + NUKTA */
3266
3267        0xBA, 0xE9,
3268
3269        0xBF, 0xE9,
3270
3271        0xC0, 0xE9,
3272
3273        0xC9, 0xE9,
3274        /* INV halant RA    */
3275        0xD9, 0xE8, 0xCF,
3276        0x00, 0x00A0,
3277        /* just consume unhandled codepoints */
3278        0xEF, 0x30,
3279
3280    };
3281    testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
3282    TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3283
3284}
3285
3286static void
3287TestISO_2022_JP() {
3288    /* test input */
3289    static const uint16_t in[]={
3290        0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3291        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3292        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3293        0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3294        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3295        0x201D, 0x3014, 0x000D, 0x000A,
3296        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3297        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3298        };
3299    const UChar* uSource;
3300    const UChar* uSourceLimit;
3301    const char* cSource;
3302    const char* cSourceLimit;
3303    UChar *uTargetLimit =NULL;
3304    UChar *uTarget;
3305    char *cTarget;
3306    const char *cTargetLimit;
3307    char *cBuf;
3308    UChar *uBuf,*test;
3309    int32_t uBufSize = 120;
3310    UErrorCode errorCode=U_ZERO_ERROR;
3311    UConverter *cnv;
3312    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3313    int32_t* myOff= offsets;
3314    cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3315    if(U_FAILURE(errorCode)) {
3316        log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3317        return;
3318    }
3319
3320    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3321    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3322    uSource = (const UChar*)in;
3323    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3324    cTarget = cBuf;
3325    cTargetLimit = cBuf +uBufSize*5;
3326    uTarget = uBuf;
3327    uTargetLimit = uBuf+ uBufSize*5;
3328    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3329    if(U_FAILURE(errorCode)){
3330        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3331        return;
3332    }
3333    cSource = cBuf;
3334    cSourceLimit =cTarget;
3335    test =uBuf;
3336    myOff=offsets;
3337    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3338    if(U_FAILURE(errorCode)){
3339        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3340        return;
3341    }
3342
3343    uSource = (const UChar*)in;
3344    while(uSource<uSourceLimit){
3345        if(*test!=*uSource){
3346
3347            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3348        }
3349        uSource++;
3350        test++;
3351    }
3352
3353    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3354    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3355    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3356    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3357    TestJitterbug930("csISO2022JP");
3358    ucnv_close(cnv);
3359    free(uBuf);
3360    free(cBuf);
3361    free(offsets);
3362}
3363
3364static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3365    const UChar* uSource;
3366    const UChar* uSourceLimit;
3367    const char* cSource;
3368    const char* cSourceLimit;
3369    UChar *uTargetLimit =NULL;
3370    UChar *uTarget;
3371    char *cTarget;
3372    const char *cTargetLimit;
3373    char *cBuf;
3374    UChar *uBuf,*test;
3375    int32_t uBufSize = 120*10;
3376    UErrorCode errorCode=U_ZERO_ERROR;
3377    UConverter *cnv;
3378    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3379    int32_t* myOff= offsets;
3380    cnv=my_ucnv_open(conv, &errorCode);
3381    if(U_FAILURE(errorCode)) {
3382        log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3383        return;
3384    }
3385
3386    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
3387    cBuf =(char*)malloc(uBufSize * sizeof(char));
3388    uSource = (const UChar*)in;
3389    uSourceLimit=uSource+len;
3390    cTarget = cBuf;
3391    cTargetLimit = cBuf +uBufSize;
3392    uTarget = uBuf;
3393    uTargetLimit = uBuf+ uBufSize;
3394    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3395    if(U_FAILURE(errorCode)){
3396        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3397        return;
3398    }
3399    /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3400    cSource = cBuf;
3401    cSourceLimit =cTarget;
3402    test =uBuf;
3403    myOff=offsets;
3404    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3405    if(U_FAILURE(errorCode)){
3406        log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3407        return;
3408    }
3409
3410    uSource = (const UChar*)in;
3411    while(uSource<uSourceLimit){
3412        if(*test!=*uSource){
3413            log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3414        }
3415        uSource++;
3416        test++;
3417    }
3418    TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3419    TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3420    TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3421    if(byteArr && byteArrLen!=0){
3422        TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3423        TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3424        {
3425            cSource = byteArr;
3426            cSourceLimit = cSource+byteArrLen;
3427            test=uBuf;
3428            myOff = offsets;
3429            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3430            if(U_FAILURE(errorCode)){
3431                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3432                return;
3433            }
3434
3435            uSource = (const UChar*)in;
3436            while(uSource<uSourceLimit){
3437                if(*test!=*uSource){
3438                    log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3439                }
3440                uSource++;
3441                test++;
3442            }
3443        }
3444    }
3445
3446    ucnv_close(cnv);
3447    free(uBuf);
3448    free(cBuf);
3449    free(offsets);
3450}
3451static UChar U_CALLCONV
3452_charAt(int32_t offset, void *context) {
3453    return ((char*)context)[offset];
3454}
3455
3456static int32_t
3457unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3458    int32_t srcIndex=0;
3459    int32_t dstIndex=0;
3460    if(U_FAILURE(*status)){
3461        return 0;
3462    }
3463    if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3464        *status = U_ILLEGAL_ARGUMENT_ERROR;
3465        return 0;
3466    }
3467    if(srcLen==-1){
3468        srcLen = (int32_t)uprv_strlen(src);
3469    }
3470
3471    for (; srcIndex<srcLen; ) {
3472        UChar32 c = src[srcIndex++];
3473        if (c == 0x005C /*'\\'*/) {
3474            c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3475            if (c == (UChar32)0xFFFFFFFF) {
3476                *status=U_INVALID_CHAR_FOUND; /* return empty string */
3477                break; /* invalid escape sequence */
3478            }
3479        }
3480        if(dstIndex < dstLen){
3481            if(c>0xFFFF){
3482               dst[dstIndex++] = U16_LEAD(c);
3483               if(dstIndex<dstLen){
3484                    dst[dstIndex]=U16_TRAIL(c);
3485               }else{
3486                   *status=U_BUFFER_OVERFLOW_ERROR;
3487               }
3488            }else{
3489                dst[dstIndex]=(UChar)c;
3490            }
3491
3492        }else{
3493            *status = U_BUFFER_OVERFLOW_ERROR;
3494        }
3495        dstIndex++; /* for preflighting */
3496    }
3497    return dstIndex;
3498}
3499
3500static void
3501TestFullRoundtrip(const char* cp){
3502    UChar usource[10] ={0};
3503    UChar nsrc[10] = {0};
3504    uint32_t i=1;
3505    int len=0, ulen;
3506    nsrc[0]=0x0061;
3507    /* Test codepoint 0 */
3508    TestConv(usource,1,cp,"",NULL,0);
3509    TestConv(usource,2,cp,"",NULL,0);
3510    nsrc[2]=0x5555;
3511    TestConv(nsrc,3,cp,"",NULL,0);
3512
3513    for(;i<=0x10FFFF;i++){
3514        if(i==0xD800){
3515            i=0xDFFF;
3516            continue;
3517        }
3518        if(i<=0xFFFF){
3519            usource[0] =(UChar) i;
3520            len=1;
3521        }else{
3522            usource[0]=U16_LEAD(i);
3523            usource[1]=U16_TRAIL(i);
3524            len=2;
3525        }
3526        ulen=len;
3527        if(i==0x80) {
3528            usource[2]=0;
3529        }
3530        /* Test only single code points */
3531        TestConv(usource,ulen,cp,"",NULL,0);
3532        /* Test codepoint repeated twice */
3533        usource[ulen]=usource[0];
3534        usource[ulen+1]=usource[1];
3535        ulen+=len;
3536        TestConv(usource,ulen,cp,"",NULL,0);
3537        /* Test codepoint repeated 3 times */
3538        usource[ulen]=usource[0];
3539        usource[ulen+1]=usource[1];
3540        ulen+=len;
3541        TestConv(usource,ulen,cp,"",NULL,0);
3542        /* Test codepoint in between 2 codepoints */
3543        nsrc[1]=usource[0];
3544        nsrc[2]=usource[1];
3545        nsrc[len+1]=0x5555;
3546        TestConv(nsrc,len+2,cp,"",NULL,0);
3547        uprv_memset(usource,0,sizeof(UChar)*10);
3548    }
3549}
3550
3551static void
3552TestRoundTrippingAllUTF(void){
3553    if(!getTestOption(QUICK_OPTION)){
3554        log_verbose("Running exhaustive round trip test for BOCU-1\n");
3555        TestFullRoundtrip("BOCU-1");
3556        log_verbose("Running exhaustive round trip test for SCSU\n");
3557        TestFullRoundtrip("SCSU");
3558        log_verbose("Running exhaustive round trip test for UTF-8\n");
3559        TestFullRoundtrip("UTF-8");
3560        log_verbose("Running exhaustive round trip test for CESU-8\n");
3561        TestFullRoundtrip("CESU-8");
3562        log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3563        TestFullRoundtrip("UTF-16BE");
3564        log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3565        TestFullRoundtrip("UTF-16LE");
3566        log_verbose("Running exhaustive round trip test for UTF-16\n");
3567        TestFullRoundtrip("UTF-16");
3568        log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3569        TestFullRoundtrip("UTF-32BE");
3570        log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3571        TestFullRoundtrip("UTF-32LE");
3572        log_verbose("Running exhaustive round trip test for UTF-32\n");
3573        TestFullRoundtrip("UTF-32");
3574        log_verbose("Running exhaustive round trip test for UTF-7\n");
3575        TestFullRoundtrip("UTF-7");
3576        log_verbose("Running exhaustive round trip test for UTF-7\n");
3577        TestFullRoundtrip("UTF-7,version=1");
3578        log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3579        TestFullRoundtrip("IMAP-mailbox-name");
3580        /*
3581         *
3582         * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3583         * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3584         * The old mappings remain as fallbacks.
3585         * This test may be reintroduced at a later time.
3586         *
3587         * 110118 - mow
3588         */
3589         /*
3590         log_verbose("Running exhaustive round trip test for GB18030\n");
3591         TestFullRoundtrip("GB18030");
3592         */
3593    }
3594}
3595
3596static void
3597TestSCSU() {
3598
3599    static const uint16_t germanUTF16[]={
3600        0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3601    };
3602
3603    static const uint8_t germanSCSU[]={
3604        0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3605    };
3606
3607    static const uint16_t russianUTF16[]={
3608        0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3609    };
3610
3611    static const uint8_t russianSCSU[]={
3612        0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3613    };
3614
3615    static const uint16_t japaneseUTF16[]={
3616        0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3617        0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3618        0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3619        0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3620        0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3621        0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3622        0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3623        0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3624        0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3625        0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3626        0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3627        0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3628        0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3629        0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3630        0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3631    };
3632
3633    /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3634     it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3635    static const uint8_t japaneseSCSU[]={
3636        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3637        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3638        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3639        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3640        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3641        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3642        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3643        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3644        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3645        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3646        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3647        0xcb, 0x82
3648    };
3649
3650    static const uint16_t allFeaturesUTF16[]={
3651        0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3652        0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3653        0x01df, 0xf000, 0xdbff, 0xdfff
3654    };
3655
3656    /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3657     * result here (34B vs. 35B)
3658     */
3659    static const uint8_t allFeaturesSCSU[]={
3660        0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3661        0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3662        0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3663        0xdf, 0x14, 0x80, 0x15, 0xff
3664    };
3665    static const uint16_t monkeyIn[]={
3666        0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3667        0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3668        0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3669        0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3670        0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3671        0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3672        0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3673        0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3674        0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3675        0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3676        0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3677        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3678        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3679        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3680        0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3681        0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3682        0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3683        0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3684        0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3685        /* test non-BMP code points */
3686        0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3687        0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3688        0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3689        0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3690        0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3691        0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3692        0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3693        0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3694        0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3695        0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3696        0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3697
3698
3699        0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3700        0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3701        0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3702        0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3703        0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3704    };
3705    static const char *fTestCases [] = {
3706          "\\ud800\\udc00", /* smallest surrogate*/
3707          "\\ud8ff\\udcff",
3708          "\\udBff\\udFff", /* largest surrogate pair*/
3709          "\\ud834\\udc00",
3710          "\\U0010FFFF",
3711          "Hello \\u9292 \\u9192 World!",
3712          "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3713          "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3714
3715          "\\u0648\\u06c8", /* catch missing reset*/
3716          "\\u0648\\u06c8",
3717
3718          "\\u4444\\uE001", /* lowest quotable*/
3719          "\\u4444\\uf2FF", /* highest quotable*/
3720          "\\u4444\\uf188\\u4444",
3721          "\\u4444\\uf188\\uf288",
3722          "\\u4444\\uf188abc\\u0429\\uf288",
3723          "\\u9292\\u2222",
3724          "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3725          "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3726          "Hello World!123456",
3727          "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3728
3729          "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
3730          "abc\\u4411d",      /* uses SQU*/
3731          "abc\\u4411\\u4412d",/* uses SCU*/
3732          "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3733          "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3734          "\\u9292\\u2222",
3735          "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3736          "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3737          "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3738
3739          "", /* empty input*/
3740          "\\u0000", /* smallest BMP character*/
3741          "\\uFFFF", /* largest BMP character*/
3742
3743          /* regression tests*/
3744          "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3745          "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3746          "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3747          "\\u0041\\u00df\\u0401\\u015f",
3748          "\\u9066\\u2123abc",
3749          "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3750          "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3751    };
3752    int i=0;
3753    for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
3754        const char* cSrc = fTestCases[i];
3755        UErrorCode status = U_ZERO_ERROR;
3756        int32_t cSrcLen,srcLen;
3757        UChar* src;
3758        /* UConverter* cnv = ucnv_open("SCSU",&status); */
3759        cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3760        src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3761        srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3762        log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3763        TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3764        free(src);
3765    }
3766    TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3767    TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3768    TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3769    TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3770    TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3771    TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3772    TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3773}
3774
3775#if !UCONFIG_NO_LEGACY_CONVERSION
3776static void TestJitterbug2346(){
3777    char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3778                      0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3779    uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3780
3781    UChar uTarget[500]={'\0'};
3782    UChar* utarget=uTarget;
3783    UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3784
3785    char cTarget[500]={'\0'};
3786    char* ctarget=cTarget;
3787    char* ctargetLimit=cTarget+sizeof(cTarget);
3788    const char* csource=source;
3789    UChar* temp = expected;
3790    UErrorCode err=U_ZERO_ERROR;
3791
3792    UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3793    if(U_FAILURE(err)) {
3794        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3795        return;
3796    }
3797    ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3798    if(U_FAILURE(err)) {
3799        log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3800        return;
3801    }
3802    utargetLimit=utarget;
3803    utarget = uTarget;
3804    while(utarget<utargetLimit){
3805        if(*temp!=*utarget){
3806
3807            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3808        }
3809        utarget++;
3810        temp++;
3811    }
3812    ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3813    if(U_FAILURE(err)) {
3814        log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3815        return;
3816    }
3817    ctargetLimit=ctarget;
3818    ctarget =cTarget;
3819    ucnv_close(conv);
3820
3821
3822}
3823
3824static void
3825TestISO_2022_JP_1() {
3826    /* test input */
3827    static const uint16_t in[]={
3828        0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3829        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3830        0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3831        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3832        0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3833        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3834        0x201D, 0x000D, 0x000A,
3835        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3836        0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3837        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3838        0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3839        0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3840        0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3841      };
3842    const UChar* uSource;
3843    const UChar* uSourceLimit;
3844    const char* cSource;
3845    const char* cSourceLimit;
3846    UChar *uTargetLimit =NULL;
3847    UChar *uTarget;
3848    char *cTarget;
3849    const char *cTargetLimit;
3850    char *cBuf;
3851    UChar *uBuf,*test;
3852    int32_t uBufSize = 120;
3853    UErrorCode errorCode=U_ZERO_ERROR;
3854    UConverter *cnv;
3855
3856    cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3857    if(U_FAILURE(errorCode)) {
3858        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3859        return;
3860    }
3861
3862    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3863    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3864    uSource = (const UChar*)in;
3865    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3866    cTarget = cBuf;
3867    cTargetLimit = cBuf +uBufSize*5;
3868    uTarget = uBuf;
3869    uTargetLimit = uBuf+ uBufSize*5;
3870    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3871    if(U_FAILURE(errorCode)){
3872        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3873        return;
3874    }
3875    cSource = cBuf;
3876    cSourceLimit =cTarget;
3877    test =uBuf;
3878    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3879    if(U_FAILURE(errorCode)){
3880        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3881        return;
3882    }
3883    uSource = (const UChar*)in;
3884    while(uSource<uSourceLimit){
3885        if(*test!=*uSource){
3886
3887            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3888        }
3889        uSource++;
3890        test++;
3891    }
3892    /*ucnv_close(cnv);
3893    cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3894    /*Test for the condition where there is an invalid character*/
3895    ucnv_reset(cnv);
3896    {
3897        static const uint8_t source2[]={0x0e,0x24,0x053};
3898        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3899    }
3900    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3901    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3902    ucnv_close(cnv);
3903    free(uBuf);
3904    free(cBuf);
3905}
3906
3907static void
3908TestISO_2022_JP_2() {
3909    /* test input */
3910    static const uint16_t in[]={
3911        0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3912        0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3913        0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3914        0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3915        0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3916        0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3917        0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3918        0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3919        0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3920        0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3921        0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3922        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3923        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3924        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3925        0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3926        0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3927        0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3928        0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3929        0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3930      };
3931    const UChar* uSource;
3932    const UChar* uSourceLimit;
3933    const char* cSource;
3934    const char* cSourceLimit;
3935    UChar *uTargetLimit =NULL;
3936    UChar *uTarget;
3937    char *cTarget;
3938    const char *cTargetLimit;
3939    char *cBuf;
3940    UChar *uBuf,*test;
3941    int32_t uBufSize = 120;
3942    UErrorCode errorCode=U_ZERO_ERROR;
3943    UConverter *cnv;
3944    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3945    int32_t* myOff= offsets;
3946    cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3947    if(U_FAILURE(errorCode)) {
3948        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3949        return;
3950    }
3951
3952    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3953    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3954    uSource = (const UChar*)in;
3955    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3956    cTarget = cBuf;
3957    cTargetLimit = cBuf +uBufSize*5;
3958    uTarget = uBuf;
3959    uTargetLimit = uBuf+ uBufSize*5;
3960    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3961    if(U_FAILURE(errorCode)){
3962        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3963        return;
3964    }
3965    cSource = cBuf;
3966    cSourceLimit =cTarget;
3967    test =uBuf;
3968    myOff=offsets;
3969    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3970    if(U_FAILURE(errorCode)){
3971        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3972        return;
3973    }
3974    uSource = (const UChar*)in;
3975    while(uSource<uSourceLimit){
3976        if(*test!=*uSource){
3977
3978            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3979        }
3980        uSource++;
3981        test++;
3982    }
3983    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3984    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3985    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3986    /*Test for the condition where there is an invalid character*/
3987    ucnv_reset(cnv);
3988    {
3989        static const uint8_t source2[]={0x0e,0x24,0x053};
3990        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
3991    }
3992    ucnv_close(cnv);
3993    free(uBuf);
3994    free(cBuf);
3995    free(offsets);
3996}
3997
3998static void
3999TestISO_2022_KR() {
4000    /* test input */
4001    static const uint16_t in[]={
4002                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4003                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4004                   ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4005                   ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4006                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4007                   ,0x53E3,0x53E4,0x000A,0x000D};
4008    const UChar* uSource;
4009    const UChar* uSourceLimit;
4010    const char* cSource;
4011    const char* cSourceLimit;
4012    UChar *uTargetLimit =NULL;
4013    UChar *uTarget;
4014    char *cTarget;
4015    const char *cTargetLimit;
4016    char *cBuf;
4017    UChar *uBuf,*test;
4018    int32_t uBufSize = 120;
4019    UErrorCode errorCode=U_ZERO_ERROR;
4020    UConverter *cnv;
4021    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4022    int32_t* myOff= offsets;
4023    cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4024    if(U_FAILURE(errorCode)) {
4025        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4026        return;
4027    }
4028
4029    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4030    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4031    uSource = (const UChar*)in;
4032    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4033    cTarget = cBuf;
4034    cTargetLimit = cBuf +uBufSize*5;
4035    uTarget = uBuf;
4036    uTargetLimit = uBuf+ uBufSize*5;
4037    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4038    if(U_FAILURE(errorCode)){
4039        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4040        return;
4041    }
4042    cSource = cBuf;
4043    cSourceLimit =cTarget;
4044    test =uBuf;
4045    myOff=offsets;
4046    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4047    if(U_FAILURE(errorCode)){
4048        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4049        return;
4050    }
4051    uSource = (const UChar*)in;
4052    while(uSource<uSourceLimit){
4053        if(*test!=*uSource){
4054            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4055        }
4056        uSource++;
4057        test++;
4058    }
4059    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4060    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4061    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4062    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4063    TestJitterbug930("csISO2022KR");
4064    /*Test for the condition where there is an invalid character*/
4065    ucnv_reset(cnv);
4066    {
4067        static const uint8_t source2[]={0x1b,0x24,0x053};
4068        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4069        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4070    }
4071    ucnv_close(cnv);
4072    free(uBuf);
4073    free(cBuf);
4074    free(offsets);
4075}
4076
4077static void
4078TestISO_2022_KR_1() {
4079    /* test input */
4080    static const uint16_t in[]={
4081                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4082                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4083                   ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4084                   ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4085                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4086                   ,0x53E3,0x53E4,0x000A,0x000D};
4087    const UChar* uSource;
4088    const UChar* uSourceLimit;
4089    const char* cSource;
4090    const char* cSourceLimit;
4091    UChar *uTargetLimit =NULL;
4092    UChar *uTarget;
4093    char *cTarget;
4094    const char *cTargetLimit;
4095    char *cBuf;
4096    UChar *uBuf,*test;
4097    int32_t uBufSize = 120;
4098    UErrorCode errorCode=U_ZERO_ERROR;
4099    UConverter *cnv;
4100    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4101    int32_t* myOff= offsets;
4102    cnv=ucnv_open("ibm-25546", &errorCode);
4103    if(U_FAILURE(errorCode)) {
4104        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4105        return;
4106    }
4107
4108    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4109    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4110    uSource = (const UChar*)in;
4111    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4112    cTarget = cBuf;
4113    cTargetLimit = cBuf +uBufSize*5;
4114    uTarget = uBuf;
4115    uTargetLimit = uBuf+ uBufSize*5;
4116    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4117    if(U_FAILURE(errorCode)){
4118        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4119        return;
4120    }
4121    cSource = cBuf;
4122    cSourceLimit =cTarget;
4123    test =uBuf;
4124    myOff=offsets;
4125    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4126    if(U_FAILURE(errorCode)){
4127        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4128        return;
4129    }
4130    uSource = (const UChar*)in;
4131    while(uSource<uSourceLimit){
4132        if(*test!=*uSource){
4133            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4134        }
4135        uSource++;
4136        test++;
4137    }
4138    ucnv_reset(cnv);
4139    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4140    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4141    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4142    ucnv_reset(cnv);
4143    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4144        /*Test for the condition where there is an invalid character*/
4145    ucnv_reset(cnv);
4146    {
4147        static const uint8_t source2[]={0x1b,0x24,0x053};
4148        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4149        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4150    }
4151    ucnv_close(cnv);
4152    free(uBuf);
4153    free(cBuf);
4154    free(offsets);
4155}
4156
4157static void TestJitterbug2411(){
4158    static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4159                         "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4160    UConverter* kr=NULL, *kr1=NULL;
4161    UErrorCode errorCode = U_ZERO_ERROR;
4162    UChar tgt[100]={'\0'};
4163    UChar* target = tgt;
4164    UChar* targetLimit = target+100;
4165    kr=ucnv_open("iso-2022-kr", &errorCode);
4166    if(U_FAILURE(errorCode)) {
4167        log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4168        return;
4169    }
4170    ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4171    if(U_FAILURE(errorCode)) {
4172        log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4173        return;
4174    }
4175    kr1 = ucnv_open("ibm-25546", &errorCode);
4176    if(U_FAILURE(errorCode)) {
4177        log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4178        return;
4179    }
4180    target = tgt;
4181    targetLimit = target+100;
4182    ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4183
4184    if(U_FAILURE(errorCode)) {
4185        log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4186        return;
4187    }
4188
4189    ucnv_close(kr);
4190    ucnv_close(kr1);
4191
4192}
4193
4194static void
4195TestJIS(){
4196    /* From Unicode moved to testdata/conversion.txt */
4197    /*To Unicode*/
4198    {
4199        static const uint8_t sampleTextJIS[] = {
4200            0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4201            0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4202            0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4203        };
4204        static const uint16_t expectedISO2022JIS[] = {
4205            0x0041, 0x0042,
4206            0xFF81, 0xFF82,
4207            0x3000
4208        };
4209        static const int32_t  toISO2022JISOffs[]={
4210            3,4,
4211            8,9,
4212            16
4213        };
4214
4215        static const uint8_t sampleTextJIS7[] = {
4216            0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4217            0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4218            0x1b,0x24,0x42,0x21,0x21,
4219            0x0e,0x41,0x42,0x0f,      /*Test Katakana set with SI and SO */
4220            0x21,0x22,
4221            0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4222        };
4223        static const uint16_t expectedISO2022JIS7[] = {
4224            0x0041, 0x0042,
4225            0xFF81, 0xFF82,
4226            0x3000,
4227            0xFF81, 0xFF82,
4228            0x3001,
4229            0x3000
4230        };
4231        static const int32_t  toISO2022JIS7Offs[]={
4232            3,4,
4233            8,9,
4234            13,16,
4235            17,
4236            19,27
4237        };
4238        static const uint8_t sampleTextJIS8[] = {
4239            0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4240            0xa1,0xc8,0xd9,/*Katakana Set*/
4241            0x1b,0x28,0x42,
4242            0x41,0x42,
4243            0xb1,0xc3, /*Katakana Set*/
4244            0x1b,0x24,0x42,0x21,0x21
4245        };
4246        static const uint16_t expectedISO2022JIS8[] = {
4247            0x0041, 0x0042,
4248            0xff61, 0xff88, 0xff99,
4249            0x0041, 0x0042,
4250            0xff71, 0xff83,
4251            0x3000
4252        };
4253        static const int32_t  toISO2022JIS8Offs[]={
4254            3, 4,  5,  6,
4255            7, 11, 12, 13,
4256            14, 18,
4257        };
4258
4259        testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4260            sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
4261        testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4262            sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
4263        testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4264            sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
4265    }
4266
4267}
4268
4269
4270#if 0
4271 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4272
4273static void TestJitterbug915(){
4274/* tests for roundtripping of the below sequence
4275\x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+          / *plane 1 * /
4276\x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4277\x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4278\x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4279\x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4280\x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4281\x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4282*/
4283    static const char cSource[]={
4284        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4285        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4286        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4287        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4288        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4289        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4290        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4291        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4292        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4293        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4294        0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4295        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4296        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4297        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4298        0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4299        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4300        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4301        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4302        0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4303        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4304        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4305        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4306        0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4307        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4308        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4309        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4310        0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4311        0x37, 0x20, 0x2A, 0x2F
4312    };
4313    UChar uTarget[500]={'\0'};
4314    UChar* utarget=uTarget;
4315    UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4316
4317    char cTarget[500]={'\0'};
4318    char* ctarget=cTarget;
4319    char* ctargetLimit=cTarget+sizeof(cTarget);
4320    const char* csource=cSource;
4321    const char* tempSrc = cSource;
4322    UErrorCode err=U_ZERO_ERROR;
4323
4324    UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4325    if(U_FAILURE(err)) {
4326        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4327        return;
4328    }
4329    ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4330    if(U_FAILURE(err)) {
4331        log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4332        return;
4333    }
4334    utargetLimit=utarget;
4335    utarget = uTarget;
4336    ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4337    if(U_FAILURE(err)) {
4338        log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4339        return;
4340    }
4341    ctargetLimit=ctarget;
4342    ctarget =cTarget;
4343    while(ctarget<ctargetLimit){
4344        if(*ctarget != *tempSrc){
4345            log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4346        }
4347        ++ctarget;
4348        ++tempSrc;
4349    }
4350
4351    ucnv_close(conv);
4352}
4353
4354static void
4355TestISO_2022_CN_EXT() {
4356    /* test input */
4357    static const uint16_t in[]={
4358                /* test Non-BMP code points */
4359         0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4360         0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4361         0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4362         0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4363         0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4364         0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4365         0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4366         0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4367         0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4368         0xD869, 0xDED5,
4369
4370         0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4371         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4372         0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4373         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4374         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4375         0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4376         0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4377         0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4378         0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4379         0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4380         0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4381         0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4382         0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4383         0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4384         0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4385         0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4386         0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4387         0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4388
4389         0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4390
4391      };
4392
4393    const UChar* uSource;
4394    const UChar* uSourceLimit;
4395    const char* cSource;
4396    const char* cSourceLimit;
4397    UChar *uTargetLimit =NULL;
4398    UChar *uTarget;
4399    char *cTarget;
4400    const char *cTargetLimit;
4401    char *cBuf;
4402    UChar *uBuf,*test;
4403    int32_t uBufSize = 180;
4404    UErrorCode errorCode=U_ZERO_ERROR;
4405    UConverter *cnv;
4406    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4407    int32_t* myOff= offsets;
4408    cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4409    if(U_FAILURE(errorCode)) {
4410        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4411        return;
4412    }
4413
4414    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4415    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4416    uSource = (const UChar*)in;
4417    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4418    cTarget = cBuf;
4419    cTargetLimit = cBuf +uBufSize*5;
4420    uTarget = uBuf;
4421    uTargetLimit = uBuf+ uBufSize*5;
4422    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4423    if(U_FAILURE(errorCode)){
4424        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4425        return;
4426    }
4427    cSource = cBuf;
4428    cSourceLimit =cTarget;
4429    test =uBuf;
4430    myOff=offsets;
4431    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4432    if(U_FAILURE(errorCode)){
4433        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4434        return;
4435    }
4436    uSource = (const UChar*)in;
4437    while(uSource<uSourceLimit){
4438        if(*test!=*uSource){
4439            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4440        }
4441        else{
4442            log_verbose("      Got: \\u%04X\n",(int)*test) ;
4443        }
4444        uSource++;
4445        test++;
4446    }
4447    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4448    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4449    /*Test for the condition where there is an invalid character*/
4450    ucnv_reset(cnv);
4451    {
4452        static const uint8_t source2[]={0x0e,0x24,0x053};
4453        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4454    }
4455    ucnv_close(cnv);
4456    free(uBuf);
4457    free(cBuf);
4458    free(offsets);
4459}
4460#endif
4461
4462static void
4463TestISO_2022_CN() {
4464    /* test input */
4465    static const uint16_t in[]={
4466         /* jitterbug 951 */
4467         0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4468         0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4469         0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4470         0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4471         0x0020, 0x0045, 0x004e, 0x0044,
4472         /**/
4473         0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4474         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4475         0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4476         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4477         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4478         0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4479         0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4480         0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4481         0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4482         0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4483         0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4484         0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4485         0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4486         0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4487         0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4488         0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4489         0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4490
4491      };
4492    const UChar* uSource;
4493    const UChar* uSourceLimit;
4494    const char* cSource;
4495    const char* cSourceLimit;
4496    UChar *uTargetLimit =NULL;
4497    UChar *uTarget;
4498    char *cTarget;
4499    const char *cTargetLimit;
4500    char *cBuf;
4501    UChar *uBuf,*test;
4502    int32_t uBufSize = 180;
4503    UErrorCode errorCode=U_ZERO_ERROR;
4504    UConverter *cnv;
4505    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4506    int32_t* myOff= offsets;
4507    cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4508    if(U_FAILURE(errorCode)) {
4509        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4510        return;
4511    }
4512
4513    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4514    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4515    uSource = (const UChar*)in;
4516    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4517    cTarget = cBuf;
4518    cTargetLimit = cBuf +uBufSize*5;
4519    uTarget = uBuf;
4520    uTargetLimit = uBuf+ uBufSize*5;
4521    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4522    if(U_FAILURE(errorCode)){
4523        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4524        return;
4525    }
4526    cSource = cBuf;
4527    cSourceLimit =cTarget;
4528    test =uBuf;
4529    myOff=offsets;
4530    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4531    if(U_FAILURE(errorCode)){
4532        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4533        return;
4534    }
4535    uSource = (const UChar*)in;
4536    while(uSource<uSourceLimit){
4537        if(*test!=*uSource){
4538            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4539        }
4540        else{
4541            log_verbose("      Got: \\u%04X\n",(int)*test) ;
4542        }
4543        uSource++;
4544        test++;
4545    }
4546    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4547    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4548    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4549    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4550    TestJitterbug930("csISO2022CN");
4551    /*Test for the condition where there is an invalid character*/
4552    ucnv_reset(cnv);
4553    {
4554        static const uint8_t source2[]={0x0e,0x24,0x053};
4555        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4556    }
4557
4558    ucnv_close(cnv);
4559    free(uBuf);
4560    free(cBuf);
4561    free(offsets);
4562}
4563
4564/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4565typedef struct {
4566    const char *    converterName;
4567    const char *    inputText;
4568    int             inputTextLength;
4569} EmptySegmentTest;
4570
4571/* Callback for TestJitterbug6175, should only get called for empty segment errors */
4572static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4573                                             int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4574    if (reason > UCNV_IRREGULAR) {
4575        return;
4576    }
4577    if (reason != UCNV_IRREGULAR) {
4578        log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4579    }
4580    /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4581    *err = U_ZERO_ERROR;
4582    ucnv_cbToUWriteSub(toArgs,0,err);
4583}
4584
4585enum { kEmptySegmentToUCharsMax = 64 };
4586static void TestJitterbug6175(void) {
4587    static const char  iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4588    static const char  iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4589    static const char  iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4590    static const char  iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4591    static const char  hzGB2312_a[]  = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4592    static const EmptySegmentTest emptySegmentTests[] = {
4593        /* converterName inputText    inputTextLength */
4594        { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4595        { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4596        { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4597        { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4598        { "HZ-GB-2312",  hzGB2312_a,  sizeof(hzGB2312_a)  },
4599        /* terminator: */
4600        { NULL,          NULL,        0,                  }
4601    };
4602    const EmptySegmentTest * testPtr;
4603    for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4604        UErrorCode   err = U_ZERO_ERROR;
4605        UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4606        if (U_FAILURE(err)) {
4607            log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4608            return;
4609        }
4610        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4611        if (U_FAILURE(err)) {
4612            log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4613            ucnv_close(cnv);
4614            return;
4615        }
4616        {
4617            UChar         toUChars[kEmptySegmentToUCharsMax];
4618            UChar *       toUCharsPtr = toUChars;
4619            const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4620            const char *  inCharsPtr = testPtr->inputText;
4621            const char *  inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4622            ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4623        }
4624        ucnv_close(cnv);
4625    }
4626}
4627
4628static void
4629TestEBCDIC_STATEFUL() {
4630    /* test input */
4631    static const uint8_t in[]={
4632        0x61,
4633        0x1a,
4634        0x0f, 0x4b,
4635        0x42,
4636        0x40,
4637        0x36,
4638    };
4639
4640    /* expected test results */
4641    static const int32_t results[]={
4642        /* number of bytes read, code point */
4643        1, 0x002f,
4644        1, 0x0092,
4645        2, 0x002e,
4646        1, 0xff62,
4647        1, 0x0020,
4648        1, 0x0096,
4649
4650    };
4651    static const uint8_t in2[]={
4652        0x0f,
4653        0xa1,
4654        0x01
4655    };
4656
4657    /* expected test results */
4658    static const int32_t results2[]={
4659        /* number of bytes read, code point */
4660        2, 0x203E,
4661        1, 0x0001,
4662    };
4663
4664    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4665    UErrorCode errorCode=U_ZERO_ERROR;
4666    UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4667    if(U_FAILURE(errorCode)) {
4668        log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4669        return;
4670    }
4671    TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4672    ucnv_reset(cnv);
4673     /* Test the condition when source >= sourceLimit */
4674    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4675    ucnv_reset(cnv);
4676    /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4677    {
4678        static const uint8_t source1[]={0x0f};
4679        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4680    }
4681    /*Test for the condition where there is an invalid character*/
4682    ucnv_reset(cnv);
4683    {
4684        static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4685        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4686    }
4687    ucnv_reset(cnv);
4688    source=(const char*)in2;
4689    limit=(const char*)in2+sizeof(in2);
4690    TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4691    ucnv_close(cnv);
4692
4693}
4694
4695static void
4696TestGB18030() {
4697    /* test input */
4698    static const uint8_t in[]={
4699        0x24,
4700        0x7f,
4701        0x81, 0x30, 0x81, 0x30,
4702        0xa8, 0xbf,
4703        0xa2, 0xe3,
4704        0xd2, 0xbb,
4705        0x82, 0x35, 0x8f, 0x33,
4706        0x84, 0x31, 0xa4, 0x39,
4707        0x90, 0x30, 0x81, 0x30,
4708        0xe3, 0x32, 0x9a, 0x35
4709#if 0
4710        /*
4711         * Feature removed   markus 2000-oct-26
4712         * Only some codepages must match surrogate pairs into supplementary code points -
4713         * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4714         * GB 18030 provides direct encodings for supplementary code points, therefore
4715         * it must not combine two single-encoded surrogates into one code point.
4716         */
4717        0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4718#endif
4719    };
4720
4721    /* expected test results */
4722    static const int32_t results[]={
4723        /* number of bytes read, code point */
4724        1, 0x24,
4725        1, 0x7f,
4726        4, 0x80,
4727        2, 0x1f9,
4728        2, 0x20ac,
4729        2, 0x4e00,
4730        4, 0x9fa6,
4731        4, 0xffff,
4732        4, 0x10000,
4733        4, 0x10ffff
4734#if 0
4735        /* Feature removed. See comment above. */
4736        8, 0x10000
4737#endif
4738    };
4739
4740/*    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4741    UErrorCode errorCode=U_ZERO_ERROR;
4742    UConverter *cnv=ucnv_open("gb18030", &errorCode);
4743    if(U_FAILURE(errorCode)) {
4744        log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4745        return;
4746    }
4747    TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4748    ucnv_close(cnv);
4749}
4750
4751static void
4752TestLMBCS() {
4753    /* LMBCS-1 string */
4754    static const uint8_t pszLMBCS[]={
4755        0x61,
4756        0x01, 0x29,
4757        0x81,
4758        0xA0,
4759        0x0F, 0x27,
4760        0x0F, 0x91,
4761        0x14, 0x0a, 0x74,
4762        0x14, 0xF6, 0x02,
4763        0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4764        0x10, 0x88, 0xA0,
4765    };
4766
4767    /* Unicode UChar32 equivalents */
4768    static const UChar32 pszUnicode32[]={
4769        /* code point */
4770        0x00000061,
4771        0x00002013,
4772        0x000000FC,
4773        0x000000E1,
4774        0x00000007,
4775        0x00000091,
4776        0x00000a74,
4777        0x00000200,
4778        0x00023456, /* code point for surrogate pair */
4779        0x00005516
4780    };
4781
4782/* Unicode UChar equivalents */
4783    static const UChar pszUnicode[]={
4784        /* code point */
4785        0x0061,
4786        0x2013,
4787        0x00FC,
4788        0x00E1,
4789        0x0007,
4790        0x0091,
4791        0x0a74,
4792        0x0200,
4793        0xD84D, /* low surrogate */
4794        0xDC56, /* high surrogate */
4795        0x5516
4796    };
4797
4798/* expected test results */
4799    static const int offsets32[]={
4800        /* number of bytes read, code point */
4801        0,
4802        1,
4803        3,
4804        4,
4805        5,
4806        7,
4807        9,
4808        12,
4809        15,
4810        21,
4811        24
4812    };
4813
4814/* expected test results */
4815    static const int offsets[]={
4816        /* number of bytes read, code point */
4817        0,
4818        1,
4819        3,
4820        4,
4821        5,
4822        7,
4823        9,
4824        12,
4825        15,
4826        18,
4827        21,
4828        24
4829    };
4830
4831
4832    UConverter *cnv;
4833
4834#define NAME_LMBCS_1 "LMBCS-1"
4835#define NAME_LMBCS_2 "LMBCS-2"
4836
4837
4838   /* Some basic open/close/property tests on some LMBCS converters */
4839    {
4840
4841      char expected_subchars[] = {0x3F};   /* ANSI Question Mark */
4842      char new_subchars [] = {0x7F};       /* subst char used by SmartSuite..*/
4843      char get_subchars [1];
4844      const char * get_name;
4845      UConverter *cnv1;
4846      UConverter *cnv2;
4847
4848      int8_t len = sizeof(get_subchars);
4849
4850      UErrorCode errorCode=U_ZERO_ERROR;
4851
4852      /* Open */
4853      cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4854      if(U_FAILURE(errorCode)) {
4855         log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4856         return;
4857      }
4858      cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4859      if(U_FAILURE(errorCode)) {
4860         log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4861         return;
4862      }
4863
4864      /* Name */
4865      get_name = ucnv_getName (cnv1, &errorCode);
4866      if (strcmp(NAME_LMBCS_1,get_name)){
4867         log_err("Unexpected converter name: %s\n", get_name);
4868      }
4869      get_name = ucnv_getName (cnv2, &errorCode);
4870      if (strcmp(NAME_LMBCS_2,get_name)){
4871         log_err("Unexpected converter name: %s\n", get_name);
4872      }
4873
4874      /* substitution chars */
4875      ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4876      if(U_FAILURE(errorCode)) {
4877         log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4878      }
4879      if (len!=1){
4880         log_err("Unexpected length of sub chars\n");
4881      }
4882      if (get_subchars[0] != expected_subchars[0]){
4883           log_err("Unexpected value of sub chars\n");
4884      }
4885      ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4886      if(U_FAILURE(errorCode)) {
4887         log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4888      }
4889      ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4890      if(U_FAILURE(errorCode)) {
4891         log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4892      }
4893      if (len!=1){
4894         log_err("Unexpected length of sub chars\n");
4895      }
4896      if (get_subchars[0] != new_subchars[0]){
4897           log_err("Unexpected value of sub chars\n");
4898      }
4899      ucnv_close(cnv1);
4900      ucnv_close(cnv2);
4901
4902    }
4903
4904    /* LMBCS to Unicode - offsets */
4905    {
4906       UErrorCode errorCode=U_ZERO_ERROR;
4907
4908       const char * pSource = (const char *)pszLMBCS;
4909       const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4910
4911       UChar Out [sizeof(pszUnicode) + 1];
4912       UChar * pOut = Out;
4913       UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4914
4915       int32_t off [sizeof(offsets)];
4916
4917      /* last 'offset' in expected results is just the final size.
4918         (Makes other tests easier). Compensate here: */
4919
4920       off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
4921
4922
4923
4924      cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4925      if(U_FAILURE(errorCode)) {
4926           log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4927           return;
4928      }
4929
4930
4931
4932      ucnv_toUnicode (cnv,
4933                      &pOut,
4934                      OutLimit,
4935                      &pSource,
4936                      sourceLimit,
4937                      off,
4938                      TRUE,
4939                      &errorCode);
4940
4941
4942       if (memcmp(off,offsets,sizeof(offsets)))
4943       {
4944         log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4945       }
4946       if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4947       {
4948         log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4949       }
4950       ucnv_close(cnv);
4951    }
4952    {
4953   /* LMBCS to Unicode - getNextUChar */
4954      const char * sourceStart;
4955      const char *source=(const char *)pszLMBCS;
4956      const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4957      const UChar32 *results= pszUnicode32;
4958      const int *off = offsets32;
4959
4960      UErrorCode errorCode=U_ZERO_ERROR;
4961      UChar32 uniChar;
4962
4963      cnv=ucnv_open("LMBCS-1", &errorCode);
4964      if(U_FAILURE(errorCode)) {
4965           log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4966           return;
4967      }
4968      else
4969      {
4970
4971         while(source<limit) {
4972            sourceStart=source;
4973            uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4974            if(U_FAILURE(errorCode)) {
4975                  log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4976                  break;
4977            } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4978               log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4979                   uniChar, (source-sourceStart), *results, *off);
4980               break;
4981            }
4982            results++;
4983            off++;
4984         }
4985       }
4986       ucnv_close(cnv);
4987    }
4988    { /* test locale & optimization group operations: Unicode to LMBCS */
4989
4990      UErrorCode errorCode=U_ZERO_ERROR;
4991      UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
4992      UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
4993      UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
4994      UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
4995      const UChar * pUniOut = uniString;
4996      UChar * pUniIn = uniString;
4997      uint8_t lmbcsString [4];
4998      const char * pLMBCSOut = (const char *)lmbcsString;
4999      char * pLMBCSIn = (char *)lmbcsString;
5000
5001      /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5002      ucnv_fromUnicode (cnv16he,
5003                        &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
5004                        &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
5005                        NULL, 1, &errorCode);
5006
5007      if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5008      {
5009         log_err("LMBCS-16,locale=he gives unexpected translation\n");
5010      }
5011
5012      pLMBCSIn= (char *)lmbcsString;
5013      pUniOut = uniString;
5014      ucnv_fromUnicode (cnv01us,
5015                        &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
5016                        &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
5017                        NULL, 1, &errorCode);
5018
5019      if (lmbcsString[0] != 0x9F)
5020      {
5021         log_err("LMBCS-1,locale=US gives unexpected translation\n");
5022      }
5023
5024      /* single byte char from mbcs char set */
5025      lmbcsString[0] = 0xAE;  /* 1/2 width katakana letter small Yo */
5026      pLMBCSOut = (const char *)lmbcsString;
5027      pUniIn = uniString;
5028      ucnv_toUnicode (cnv16jp,
5029                        &pUniIn, pUniIn + 1,
5030                        &pLMBCSOut, (pLMBCSOut + 1),
5031                        NULL, 1, &errorCode);
5032      if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5033      {
5034           log_err("Unexpected results from LMBCS-16 single byte char\n");
5035      }
5036      /* convert to group 1: should be 3 bytes */
5037      pLMBCSIn = (char *)lmbcsString;
5038      pUniOut = uniString;
5039      ucnv_fromUnicode (cnv01us,
5040                        &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5041                        &pUniOut, pUniOut + 1,
5042                        NULL, 1, &errorCode);
5043      if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5044         || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5045      {
5046           log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5047      }
5048      pLMBCSOut = (const char *)lmbcsString;
5049      pUniIn = uniString;
5050      ucnv_toUnicode (cnv01us,
5051                        &pUniIn, pUniIn + 1,
5052                        &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5053                        NULL, 1, &errorCode);
5054      if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5055      {
5056           log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5057      }
5058      pLMBCSIn = (char *)lmbcsString;
5059      pUniOut = uniString;
5060      ucnv_fromUnicode (cnv16jp,
5061                        &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5062                        &pUniOut, pUniOut + 1,
5063                        NULL, 1, &errorCode);
5064      if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5065      {
5066           log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5067      }
5068      ucnv_close(cnv16he);
5069      ucnv_close(cnv16jp);
5070      ucnv_close(cnv01us);
5071    }
5072    {
5073       /* Small source buffer testing, LMBCS -> Unicode */
5074
5075       UErrorCode errorCode=U_ZERO_ERROR;
5076
5077       const char * pSource = (const char *)pszLMBCS;
5078       const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5079       int codepointCount = 0;
5080
5081       UChar Out [sizeof(pszUnicode) + 1];
5082       UChar * pOut = Out;
5083       UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
5084
5085
5086       cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5087       if(U_FAILURE(errorCode)) {
5088           log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5089           return;
5090       }
5091
5092
5093       while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5094       {
5095           ucnv_toUnicode (cnv,
5096               &pOut,
5097               OutLimit,
5098               &pSource,
5099               (pSource+1), /* claim that this is a 1- byte buffer */
5100               NULL,
5101               FALSE,    /* FALSE means there might be more chars in the next buffer */
5102               &errorCode);
5103
5104           if (U_SUCCESS (errorCode))
5105           {
5106               if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5107               {
5108                   /* we are on to the next code point: check value */
5109
5110                   if (Out[0] != pszUnicode[codepointCount]){
5111                       log_err("LMBCS->Uni result %lx should have been %lx \n",
5112                           Out[0], pszUnicode[codepointCount]);
5113                   }
5114
5115                   pOut = Out; /* reset for accumulating next code point */
5116                   codepointCount++;
5117               }
5118           }
5119           else
5120           {
5121               log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5122           }
5123       }
5124       {
5125         /* limits & surrogate error testing */
5126         char LIn [sizeof(pszLMBCS)];
5127         const char * pLIn = LIn;
5128
5129         char LOut [sizeof(pszLMBCS)];
5130         char * pLOut = LOut;
5131
5132         UChar UOut [sizeof(pszUnicode)];
5133         UChar * pUOut = UOut;
5134
5135         UChar UIn [sizeof(pszUnicode)];
5136         const UChar * pUIn = UIn;
5137
5138         int32_t off [sizeof(offsets)];
5139         UChar32 uniChar;
5140
5141         errorCode=U_ZERO_ERROR;
5142
5143         /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5144         pUIn++;
5145         ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
5146         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5147         {
5148            log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5149         }
5150         pUIn--;
5151
5152         errorCode=U_ZERO_ERROR;
5153         ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5154         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5155         {
5156            log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5157         }
5158         errorCode=U_ZERO_ERROR;
5159
5160         uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5161         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5162         {
5163            log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5164         }
5165         errorCode=U_ZERO_ERROR;
5166
5167         /* 0 byte source request - no error, no pointer movement */
5168         ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5169         ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5170         if(U_FAILURE(errorCode)) {
5171            log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5172         }
5173         if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5174         {
5175              log_err("Unexpected pointer move in 0 byte source request \n");
5176         }
5177         /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5178         uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5179         if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5180         {
5181            log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5182         }
5183         if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5184         {
5185            log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5186         }
5187         errorCode = U_ZERO_ERROR;
5188
5189         /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5190
5191         pUIn = pszUnicode;
5192         ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
5193         if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5194         {
5195            log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5196         }
5197
5198         errorCode = U_ZERO_ERROR;
5199
5200         pLIn = (const char *)pszLMBCS;
5201         ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5202         if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5203         {
5204            log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5205         }
5206
5207         /* unpaired or chopped LMBCS surrogates */
5208
5209         /* OK high surrogate, Low surrogate is chopped */
5210         LIn [0] = (char)0x14;
5211         LIn [1] = (char)0xD8;
5212         LIn [2] = (char)0x01;
5213         LIn [3] = (char)0x14;
5214         LIn [4] = (char)0xDC;
5215         pLIn = LIn;
5216         errorCode = U_ZERO_ERROR;
5217         pUOut = UOut;
5218
5219         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5220         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5221         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5222         {
5223            log_err("Unexpected results on chopped low surrogate\n");
5224         }
5225
5226         /* chopped at surrogate boundary */
5227         LIn [0] = (char)0x14;
5228         LIn [1] = (char)0xD8;
5229         LIn [2] = (char)0x01;
5230         pLIn = LIn;
5231         errorCode = U_ZERO_ERROR;
5232         pUOut = UOut;
5233
5234         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5235         if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5236         {
5237            log_err("Unexpected results on chopped at surrogate boundary \n");
5238         }
5239
5240         /* unpaired surrogate plus valid Unichar */
5241         LIn [0] = (char)0x14;
5242         LIn [1] = (char)0xD8;
5243         LIn [2] = (char)0x01;
5244         LIn [3] = (char)0x14;
5245         LIn [4] = (char)0xC9;
5246         LIn [5] = (char)0xD0;
5247         pLIn = LIn;
5248         errorCode = U_ZERO_ERROR;
5249         pUOut = UOut;
5250
5251         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5252         if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5253         {
5254            log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5255         }
5256
5257      /* unpaired surrogate plus chopped Unichar */
5258         LIn [0] = (char)0x14;
5259         LIn [1] = (char)0xD8;
5260         LIn [2] = (char)0x01;
5261         LIn [3] = (char)0x14;
5262         LIn [4] = (char)0xC9;
5263
5264         pLIn = LIn;
5265         errorCode = U_ZERO_ERROR;
5266         pUOut = UOut;
5267
5268         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5269         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5270         {
5271            log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5272         }
5273
5274         /* unpaired surrogate plus valid non-Unichar */
5275         LIn [0] = (char)0x14;
5276         LIn [1] = (char)0xD8;
5277         LIn [2] = (char)0x01;
5278         LIn [3] = (char)0x0F;
5279         LIn [4] = (char)0x3B;
5280
5281         pLIn = LIn;
5282         errorCode = U_ZERO_ERROR;
5283         pUOut = UOut;
5284
5285         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5286         if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5287         {
5288            log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5289         }
5290
5291         /* unpaired surrogate plus chopped non-Unichar */
5292         LIn [0] = (char)0x14;
5293         LIn [1] = (char)0xD8;
5294         LIn [2] = (char)0x01;
5295         LIn [3] = (char)0x0F;
5296
5297         pLIn = LIn;
5298         errorCode = U_ZERO_ERROR;
5299         pUOut = UOut;
5300
5301         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5302
5303         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5304         {
5305            log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5306         }
5307       }
5308    }
5309   ucnv_close(cnv);  /* final cleanup */
5310}
5311
5312
5313static void TestJitterbug255()
5314{
5315    static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5316    const char *testBuffer = (const char *)testBytes;
5317    const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5318    UErrorCode status = U_ZERO_ERROR;
5319    /*UChar32 result;*/
5320    UConverter *cnv = 0;
5321
5322    cnv = ucnv_open("shift-jis", &status);
5323    if (U_FAILURE(status) || cnv == 0) {
5324        log_data_err("Failed to open the converter for SJIS.\n");
5325                return;
5326    }
5327    while (testBuffer != testEnd)
5328    {
5329        /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5330        if (U_FAILURE(status))
5331        {
5332            log_err("Failed to convert the next UChar for SJIS.\n");
5333            break;
5334        }
5335    }
5336    ucnv_close(cnv);
5337}
5338
5339static void TestEBCDICUS4XML()
5340{
5341    UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5342    static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5343    static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5344    static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5345    char target_x[] = {0x00, 0x00, 0x00, 0x00};
5346    UChar *unicodes = unicodes_x;
5347    const UChar *toUnicodeMaps = toUnicodeMaps_x;
5348    char *target = target_x;
5349    const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5350    UErrorCode status = U_ZERO_ERROR;
5351    UConverter *cnv = 0;
5352
5353    cnv = ucnv_open("ebcdic-xml-us", &status);
5354    if (U_FAILURE(status) || cnv == 0) {
5355        log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5356        return;
5357    }
5358    ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5359    if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5360        log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5361            u_errorName(status));
5362        printUSeqErr(unicodes_x, 3);
5363        printUSeqErr(toUnicodeMaps, 3);
5364    }
5365    status = U_ZERO_ERROR;
5366    ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5367    if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5368        log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5369            u_errorName(status));
5370        printSeqErr((const unsigned char*)target_x, 3);
5371        printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5372    }
5373    ucnv_close(cnv);
5374}
5375#endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5376
5377#if !UCONFIG_NO_COLLATION
5378
5379static void TestJitterbug981(){
5380    const UChar* rules;
5381    int32_t rules_length, target_cap, bytes_needed, buff_size;
5382    UErrorCode status = U_ZERO_ERROR;
5383    UConverter *utf8cnv;
5384    UCollator* myCollator;
5385    char *buff;
5386    int numNeeded=0;
5387    utf8cnv = ucnv_open ("utf8", &status);
5388    if(U_FAILURE(status)){
5389        log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5390        return;
5391    }
5392    myCollator = ucol_open("zh", &status);
5393    if(U_FAILURE(status)){
5394        log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5395        ucnv_close(utf8cnv);
5396        return;
5397    }
5398
5399    rules = ucol_getRules(myCollator, &rules_length);
5400    buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5401    buff = malloc(buff_size);
5402
5403    target_cap = 0;
5404    do {
5405        ucnv_reset(utf8cnv);
5406        status = U_ZERO_ERROR;
5407        if(target_cap >= buff_size) {
5408            log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5409            break;
5410        }
5411        bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5412            rules, rules_length, &status);
5413        target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5414        if(numNeeded!=0 && numNeeded!= bytes_needed){
5415            log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5416            break;
5417        }
5418        numNeeded = bytes_needed;
5419    } while (status == U_BUFFER_OVERFLOW_ERROR);
5420    ucol_close(myCollator);
5421    ucnv_close(utf8cnv);
5422    free(buff);
5423}
5424
5425#endif
5426
5427#if !UCONFIG_NO_LEGACY_CONVERSION
5428static void TestJitterbug1293(){
5429    static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5430    char target[256];
5431    UErrorCode status = U_ZERO_ERROR;
5432    UConverter* conv=NULL;
5433    int32_t target_cap, bytes_needed, numNeeded = 0;
5434    conv = ucnv_open("shift-jis",&status);
5435    if(U_FAILURE(status)){
5436      log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5437      return;
5438    }
5439
5440    do{
5441        target_cap =0;
5442        bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5443        target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5444        if(numNeeded!=0 && numNeeded!= bytes_needed){
5445          log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5446        }
5447        numNeeded = bytes_needed;
5448    } while (status == U_BUFFER_OVERFLOW_ERROR);
5449    if(U_FAILURE(status)){
5450      log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5451      return;
5452    }
5453    ucnv_close(conv);
5454}
5455#endif
5456
5457static void TestJB5275_1(){
5458
5459    static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5460                                "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5461                                /* Switch script: */
5462                                "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5463                                "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5464                                "\xEF\x40\x3B\xB3\x0A";
5465    static const UChar expected[] ={
5466            0x003b, 0x0a15, 0x000a, /* Easy characters */
5467            0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5468            0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5469            0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5470            0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5471    };
5472
5473    UErrorCode status = U_ZERO_ERROR;
5474    UConverter* conv = ucnv_open("iscii-gur", &status);
5475    UChar dest[100] = {'\0'};
5476    UChar* target = dest;
5477    UChar* targetLimit = dest+100;
5478    const char* source = data;
5479    const char* sourceLimit = data+strlen(data);
5480    const UChar* exp = expected;
5481
5482    if (U_FAILURE(status)) {
5483        log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5484        return;
5485    }
5486
5487    log_verbose("Testing switching back to default script when new line is encountered.\n");
5488    ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5489    if(U_FAILURE(status)){
5490        log_err("conversion failed: %s \n", u_errorName(status));
5491    }
5492    targetLimit = target;
5493    target = dest;
5494    printUSeq(target, targetLimit-target);
5495    while(target<targetLimit){
5496        if(*exp!=*target){
5497            log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5498        }
5499        target++;
5500        exp++;
5501    }
5502    ucnv_close(conv);
5503}
5504
5505static void TestJB5275(){
5506    static const char* data =
5507    /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A"  unsupported sequence \xEF\x41 */
5508    /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A"  unsupported sequence \xEF\x41  */
5509    /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A"  unsupported sequence \xEF\x41 */
5510        "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A"  /* Gurmukhi test */
5511        "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A"  /* Gujarati test */
5512        "\xEF\x48\x38\xB3\x0A"  /* Kannada test */
5513        "\xEF\x49\x39\xB3\x0A"  /* Malayalam test */
5514        "\xEF\x4A\x3A\xB3\x0A"  /* Gujarati test */
5515        "\xEF\x4B\x3B\xB3\x0A"  /* Punjabi test */
5516        /* "\xEF\x4C\x3C\xB3\x0A"  unsupported sequence \xEF\x41 */;
5517    static const UChar expected[] ={
5518        0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5519        0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A,     /* Gujarati test */
5520        0x0038, 0x0C95, 0x000A, /* Kannada test */
5521        0x0039, 0x0D15, 0x000A, /* Malayalam test */
5522        0x003A, 0x0A95, 0x000A, /* Gujarati test */
5523        0x003B, 0x0A15, 0x000A, /* Punjabi test */
5524    };
5525
5526    UErrorCode status = U_ZERO_ERROR;
5527    UConverter* conv = ucnv_open("iscii", &status);
5528    UChar dest[100] = {'\0'};
5529    UChar* target = dest;
5530    UChar* targetLimit = dest+100;
5531    const char* source = data;
5532    const char* sourceLimit = data+strlen(data);
5533    const UChar* exp = expected;
5534    ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5535    if(U_FAILURE(status)){
5536        log_err("conversion failed: %s \n", u_errorName(status));
5537    }
5538    targetLimit = target;
5539    target = dest;
5540
5541    printUSeq(target, targetLimit-target);
5542
5543    while(target<targetLimit){
5544        if(*exp!=*target){
5545            log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5546        }
5547        target++;
5548        exp++;
5549    }
5550    ucnv_close(conv);
5551}
5552
5553static void
5554TestIsFixedWidth() {
5555    UErrorCode status = U_ZERO_ERROR;
5556    UConverter *cnv = NULL;
5557    int32_t i;
5558
5559    const char *fixedWidth[] = {
5560            "US-ASCII",
5561            "UTF32",
5562            "ibm-5478_P100-1995"
5563    };
5564
5565    const char *notFixedWidth[] = {
5566            "GB18030",
5567            "UTF8",
5568            "windows-949-2000",
5569            "UTF16"
5570    };
5571
5572    for (i = 0; i < LENGTHOF(fixedWidth); i++) {
5573        cnv = ucnv_open(fixedWidth[i], &status);
5574        if (cnv == NULL || U_FAILURE(status)) {
5575            log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5576            continue;
5577        }
5578
5579        if (!ucnv_isFixedWidth(cnv, &status)) {
5580            log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]);
5581        }
5582        ucnv_close(cnv);
5583    }
5584
5585    for (i = 0; i < LENGTHOF(notFixedWidth); i++) {
5586        cnv = ucnv_open(notFixedWidth[i], &status);
5587        if (cnv == NULL || U_FAILURE(status)) {
5588            log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5589            continue;
5590        }
5591
5592        if (ucnv_isFixedWidth(cnv, &status)) {
5593            log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]);
5594        }
5595        ucnv_close(cnv);
5596    }
5597}
5598