1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8/*******************************************************************************
9*
10* File nucnvtst.c
11*
12* Modification History:
13*        Name                     Description
14*    Steven R. Loomis     7/8/1999      Adding input buffer test
15********************************************************************************
16*/
17#include <stdio.h>
18#include "cstring.h"
19#include "unicode/uloc.h"
20#include "unicode/ucnv.h"
21#include "unicode/ucnv_err.h"
22#include "unicode/ucnv_cb.h"
23#include "cintltst.h"
24#include "unicode/utypes.h"
25#include "unicode/ustring.h"
26#include "unicode/ucol.h"
27#include "unicode/utf16.h"
28#include "cmemory.h"
29#include "nucnvtst.h"
30
31static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
32static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
33#if !UCONFIG_NO_COLLATION
34static void TestJitterbug981(void);
35#endif
36#if !UCONFIG_NO_LEGACY_CONVERSION
37static void TestJitterbug1293(void);
38#endif
39static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
40static void TestConverterTypesAndStarters(void);
41static void TestAmbiguous(void);
42static void TestSignatureDetection(void);
43static void TestUTF7(void);
44static void TestIMAP(void);
45static void TestUTF8(void);
46static void TestCESU8(void);
47static void TestUTF16(void);
48static void TestUTF16BE(void);
49static void TestUTF16LE(void);
50static void TestUTF32(void);
51static void TestUTF32BE(void);
52static void TestUTF32LE(void);
53static void TestLATIN1(void);
54
55#if !UCONFIG_NO_LEGACY_CONVERSION
56static void TestSBCS(void);
57static void TestDBCS(void);
58static void TestMBCS(void);
59#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
60static void TestICCRunout(void);
61#endif
62
63#ifdef U_ENABLE_GENERIC_ISO_2022
64static void TestISO_2022(void);
65#endif
66
67static void TestISO_2022_JP(void);
68static void TestISO_2022_JP_1(void);
69static void TestISO_2022_JP_2(void);
70static void TestISO_2022_KR(void);
71static void TestISO_2022_KR_1(void);
72static void TestISO_2022_CN(void);
73#if 0
74   /*
75    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
76    */
77static void TestISO_2022_CN_EXT(void);
78#endif
79static void TestJIS(void);
80static void TestHZ(void);
81#endif
82
83static void TestSCSU(void);
84
85#if !UCONFIG_NO_LEGACY_CONVERSION
86static void TestEBCDIC_STATEFUL(void);
87static void TestGB18030(void);
88static void TestLMBCS(void);
89static void TestJitterbug255(void);
90static void TestEBCDICUS4XML(void);
91#if 0
92   /*
93    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
94    */
95static void TestJitterbug915(void);
96#endif
97static void TestISCII(void);
98
99static void TestCoverageMBCS(void);
100static void TestJitterbug2346(void);
101static void TestJitterbug2411(void);
102static void TestJB5275(void);
103static void TestJB5275_1(void);
104static void TestJitterbug6175(void);
105
106static void TestIsFixedWidth(void);
107#endif
108
109static void TestInBufSizes(void);
110
111static void TestRoundTrippingAllUTF(void);
112static void TestConv(const uint16_t in[],
113                     int len,
114                     const char* conv,
115                     const char* lang,
116                     char byteArr[],
117                     int byteArrLen);
118
119/* open a converter, using test data if it begins with '@' */
120static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
121
122
123#define NEW_MAX_BUFFER 999
124
125static int32_t  gInBufferSize = NEW_MAX_BUFFER;
126static int32_t  gOutBufferSize = NEW_MAX_BUFFER;
127static char     gNuConvTestName[1024];
128
129#define nct_min(x,y)  ((x<y) ? x : y)
130
131static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
132{
133  if(cnv && cnv[0] == '@') {
134    return ucnv_openPackage(loadTestData(err), cnv+1, err);
135  } else {
136    return ucnv_open(cnv, err);
137  }
138}
139
140static void printSeq(const unsigned char* a, int len)
141{
142    int i=0;
143    log_verbose("{");
144    while (i<len)
145        log_verbose("0x%02x ", a[i++]);
146    log_verbose("}\n");
147}
148
149static void printUSeq(const UChar* a, int len)
150{
151    int i=0;
152    log_verbose("{U+");
153    while (i<len) log_verbose("0x%04x ", a[i++]);
154    log_verbose("}\n");
155}
156
157static void printSeqErr(const unsigned char* a, int len)
158{
159    int i=0;
160    fprintf(stderr, "{");
161    while (i<len)
162        fprintf(stderr, "0x%02x ", a[i++]);
163    fprintf(stderr, "}\n");
164}
165
166static void printUSeqErr(const UChar* a, int len)
167{
168    int i=0;
169    fprintf(stderr, "{U+");
170    while (i<len)
171        fprintf(stderr, "0x%04x ", a[i++]);
172    fprintf(stderr,"}\n");
173}
174
175static void
176TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
177{
178     const char* s0;
179     const char* s=(char*)source;
180     const int32_t *r=results;
181     UErrorCode errorCode=U_ZERO_ERROR;
182     UChar32 c;
183
184     while(s<limit) {
185        s0=s;
186        c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
187        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
188            break; /* no more significant input */
189        } else if(U_FAILURE(errorCode)) {
190            log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
191            break;
192        } else if(
193            /* test the expected number of input bytes only if >=0 */
194            (*r>=0 && (int32_t)(s-s0)!=*r) ||
195            c!=*(r+1)
196        ) {
197            log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
198                message, c, (s-s0), *(r+1), *r);
199            break;
200        }
201        r+=2;
202    }
203}
204
205static void
206TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
207{
208     const char* s=(char*)source;
209     UErrorCode errorCode=U_ZERO_ERROR;
210     uint32_t c;
211     c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
212     if(errorCode != expected){
213        log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
214     }
215     if(c != 0xFFFD && c != 0xffff){
216        log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
217     }
218
219}
220
221static void TestInBufSizes(void)
222{
223  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
224#if 1
225  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
226  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
227  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
228  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
229  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
230  TestNewConvertWithBufferSizes(1,1);
231  TestNewConvertWithBufferSizes(2,3);
232  TestNewConvertWithBufferSizes(3,2);
233#endif
234}
235
236static void TestOutBufSizes(void)
237{
238#if 1
239  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
240  TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
241  TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
242  TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
243  TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
244  TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
245
246#endif
247}
248
249
250void addTestNewConvert(TestNode** root)
251{
252#if !UCONFIG_NO_FILE_IO
253   addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
254   addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
255#endif
256   addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
257   addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
258   addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
259   addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
260   addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
261   addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
262
263   /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
264   addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
265   addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
266   addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
267   addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
268   addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
269   addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
270   addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
271
272#if !UCONFIG_NO_LEGACY_CONVERSION
273   addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
274#endif
275
276   addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
277
278#if !UCONFIG_NO_LEGACY_CONVERSION
279   addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
280#if !UCONFIG_NO_FILE_IO
281   addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
282   addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
283#endif
284   addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
285
286#ifdef U_ENABLE_GENERIC_ISO_2022
287   addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
288#endif
289
290   addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
291   addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
292   addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
293   // android-changed (no have ISO_2022_JP_2) -- addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
294   addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
295   addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
296   // android-changed (no ISO-2022-CN) -- addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
297   /*
298    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
299   addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
300   addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
301    */
302   addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
303#endif
304
305   addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
306
307#if !UCONFIG_NO_LEGACY_CONVERSION
308   addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
309   addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
310   addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
311   addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
312   addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
313   addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
314   addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
315#if !UCONFIG_NO_COLLATION
316   addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
317#endif
318
319   addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
320#endif
321
322
323#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
324   addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
325#endif
326
327   addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
328
329#if !UCONFIG_NO_LEGACY_CONVERSION
330   addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
331   addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
332   // android-removed (no full ISO2022 CJK tables)  -- addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
333   addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
334#endif
335}
336
337
338/* Note that this test already makes use of statics, so it's not really
339   multithread safe.
340   This convenience function lets us make the error messages actually useful.
341*/
342
343static void setNuConvTestName(const char *codepage, const char *direction)
344{
345    sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
346        codepage,
347        direction,
348        (int)gInBufferSize,
349        (int)gOutBufferSize);
350}
351
352typedef enum
353{
354  TC_OK       = 0,  /* test was OK */
355  TC_MISMATCH = 1,  /* Match failed - err was printed */
356  TC_FAIL     = 2   /* Test failed, don't print an err because it was already printed. */
357} ETestConvertResult;
358
359/* Note: This function uses global variables and it will not do offset
360checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
361static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
362                const char *codepage, const int32_t *expectOffsets , UBool useFallback)
363{
364    UErrorCode status = U_ZERO_ERROR;
365    UConverter *conv = 0;
366    char    junkout[NEW_MAX_BUFFER]; /* FIX */
367    int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
368    char *p;
369    const UChar *src;
370    char *end;
371    char *targ;
372    int32_t *offs;
373    int i;
374    int32_t   realBufferSize;
375    char *realBufferEnd;
376    const UChar *realSourceEnd;
377    const UChar *sourceLimit;
378    UBool checkOffsets = TRUE;
379    UBool doFlush;
380
381    for(i=0;i<NEW_MAX_BUFFER;i++)
382        junkout[i] = (char)0xF0;
383    for(i=0;i<NEW_MAX_BUFFER;i++)
384        junokout[i] = 0xFF;
385
386    setNuConvTestName(codepage, "FROM");
387
388    log_verbose("\n=========  %s\n", gNuConvTestName);
389
390    conv = my_ucnv_open(codepage, &status);
391
392    if(U_FAILURE(status))
393    {
394        log_data_err("Couldn't open converter %s\n",codepage);
395        return TC_FAIL;
396    }
397    if(useFallback){
398        ucnv_setFallback(conv,useFallback);
399    }
400
401    log_verbose("Converter opened..\n");
402
403    src = source;
404    targ = junkout;
405    offs = junokout;
406
407    realBufferSize = UPRV_LENGTHOF(junkout);
408    realBufferEnd = junkout + realBufferSize;
409    realSourceEnd = source + sourceLen;
410
411    if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
412        checkOffsets = FALSE;
413
414    do
415    {
416      end = nct_min(targ + gOutBufferSize, realBufferEnd);
417      sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
418
419      doFlush = (UBool)(sourceLimit == realSourceEnd);
420
421      if(targ == realBufferEnd) {
422        log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
423        return TC_FAIL;
424      }
425      log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
426
427
428      status = U_ZERO_ERROR;
429
430      ucnv_fromUnicode (conv,
431                        &targ,
432                        end,
433                        &src,
434                        sourceLimit,
435                        checkOffsets ? offs : NULL,
436                        doFlush, /* flush if we're at the end of the input data */
437                        &status);
438    } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
439
440    if(U_FAILURE(status)) {
441      log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
442      return TC_FAIL;
443    }
444
445    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
446                sourceLen, targ-junkout);
447
448    if(getTestOption(VERBOSITY_OPTION))
449    {
450      char junk[9999];
451      char offset_str[9999];
452      char *ptr;
453
454      junk[0] = 0;
455      offset_str[0] = 0;
456      for(ptr = junkout;ptr<targ;ptr++) {
457        sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
458        sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
459      }
460
461      log_verbose(junk);
462      printSeq((const uint8_t *)expect, expectLen);
463      if ( checkOffsets ) {
464        log_verbose("\nOffsets:");
465        log_verbose(offset_str);
466      }
467      log_verbose("\n");
468    }
469    ucnv_close(conv);
470
471    if(expectLen != targ-junkout) {
472      log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
473      log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
474      fprintf(stderr, "Got:\n");
475      printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
476      fprintf(stderr, "Expected:\n");
477      printSeqErr((const unsigned char*)expect, expectLen);
478      return TC_MISMATCH;
479    }
480
481    if (checkOffsets && (expectOffsets != 0) ) {
482      log_verbose("comparing %d offsets..\n", targ-junkout);
483      if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
484        log_err("did not get the expected offsets. %s\n", gNuConvTestName);
485        printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
486        log_err("\n");
487        log_err("Got  :     ");
488        for(p=junkout;p<targ;p++) {
489          log_err("%d,", junokout[p-junkout]);
490        }
491        log_err("\n");
492        log_err("Expected:  ");
493        for(i=0; i<(targ-junkout); i++) {
494          log_err("%d,", expectOffsets[i]);
495        }
496        log_err("\n");
497      }
498    }
499
500    log_verbose("comparing..\n");
501    if(!memcmp(junkout, expect, expectLen)) {
502      log_verbose("Matches!\n");
503      return TC_OK;
504    } else {
505      log_err("String does not match u->%s\n", gNuConvTestName);
506      printUSeqErr(source, sourceLen);
507      fprintf(stderr, "Got:\n");
508      printSeqErr((const unsigned char *)junkout, expectLen);
509      fprintf(stderr, "Expected:\n");
510      printSeqErr((const unsigned char *)expect, expectLen);
511
512      return TC_MISMATCH;
513    }
514}
515
516/* Note: This function uses global variables and it will not do offset
517checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
518static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
519                                          const char *codepage, const int32_t *expectOffsets, UBool useFallback)
520{
521    UErrorCode status = U_ZERO_ERROR;
522    UConverter *conv = 0;
523    UChar    junkout[NEW_MAX_BUFFER]; /* FIX */
524    int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
525    const char *src;
526    const char *realSourceEnd;
527    const char *srcLimit;
528    UChar *p;
529    UChar *targ;
530    UChar *end;
531    int32_t *offs;
532    int i;
533    UBool   checkOffsets = TRUE;
534
535    int32_t   realBufferSize;
536    UChar *realBufferEnd;
537
538
539    for(i=0;i<NEW_MAX_BUFFER;i++)
540        junkout[i] = 0xFFFE;
541
542    for(i=0;i<NEW_MAX_BUFFER;i++)
543        junokout[i] = -1;
544
545    setNuConvTestName(codepage, "TO");
546
547    log_verbose("\n=========  %s\n", gNuConvTestName);
548
549    conv = my_ucnv_open(codepage, &status);
550
551    if(U_FAILURE(status))
552    {
553        log_data_err("Couldn't open converter %s\n",gNuConvTestName);
554        return TC_FAIL;
555    }
556    if(useFallback){
557        ucnv_setFallback(conv,useFallback);
558    }
559    log_verbose("Converter opened..\n");
560
561    src = (const char *)source;
562    targ = junkout;
563    offs = junokout;
564
565    realBufferSize = UPRV_LENGTHOF(junkout);
566    realBufferEnd = junkout + realBufferSize;
567    realSourceEnd = src + sourcelen;
568
569    if ( gOutBufferSize != realBufferSize ||  gInBufferSize != NEW_MAX_BUFFER )
570        checkOffsets = FALSE;
571
572    do
573    {
574        end = nct_min( targ + gOutBufferSize, realBufferEnd);
575        srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
576
577        if(targ == realBufferEnd)
578        {
579            log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
580            return TC_FAIL;
581        }
582        log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
583
584        /* oldTarg = targ; */
585
586        status = U_ZERO_ERROR;
587
588        ucnv_toUnicode (conv,
589                &targ,
590                end,
591                &src,
592                srcLimit,
593                checkOffsets ? offs : NULL,
594                (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
595                &status);
596
597        /*        offs += (targ-oldTarg); */
598
599      } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
600
601    if(U_FAILURE(status))
602    {
603        log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
604        return TC_FAIL;
605    }
606
607    log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
608        sourcelen, targ-junkout);
609    if(getTestOption(VERBOSITY_OPTION))
610    {
611        char junk[9999];
612        char offset_str[9999];
613        UChar *ptr;
614
615        junk[0] = 0;
616        offset_str[0] = 0;
617
618        for(ptr = junkout;ptr<targ;ptr++)
619        {
620            sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
621            sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
622        }
623
624        log_verbose(junk);
625        printUSeq(expect, expectlen);
626        if ( checkOffsets )
627          {
628            log_verbose("\nOffsets:");
629            log_verbose(offset_str);
630          }
631        log_verbose("\n");
632    }
633    ucnv_close(conv);
634
635    log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
636
637    if (checkOffsets && (expectOffsets != 0))
638    {
639        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
640            log_err("did not get the expected offsets. %s\n",gNuConvTestName);
641            log_err("Got:      ");
642            for(p=junkout;p<targ;p++) {
643                log_err("%d,", junokout[p-junkout]);
644            }
645            log_err("\n");
646            log_err("Expected: ");
647            for(i=0; i<(targ-junkout); i++) {
648                log_err("%d,", expectOffsets[i]);
649            }
650            log_err("\n");
651            log_err("output:   ");
652            for(i=0; i<(targ-junkout); i++) {
653                log_err("%X,", junkout[i]);
654            }
655            log_err("\n");
656            log_err("input:    ");
657            for(i=0; i<(src-(const char *)source); i++) {
658                log_err("%X,", (unsigned char)source[i]);
659            }
660            log_err("\n");
661        }
662    }
663
664    if(!memcmp(junkout, expect, expectlen*2))
665    {
666        log_verbose("Matches!\n");
667        return TC_OK;
668    }
669    else
670    {
671        log_err("String does not match. %s\n", gNuConvTestName);
672        log_verbose("String does not match. %s\n", gNuConvTestName);
673        printf("\nGot:");
674        printUSeqErr(junkout, expectlen);
675        printf("\nExpected:");
676        printUSeqErr(expect, expectlen);
677        return TC_MISMATCH;
678    }
679}
680
681
682static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
683{
684/** test chars #1 */
685    /*  1 2 3  1Han 2Han 3Han .  */
686    static const UChar   sampleText[] =
687     { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
688    static const UChar sampleTextRoundTripUnmappable[] =
689    { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
690
691
692    static const uint8_t expectedUTF8[] =
693     { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
694    static const int32_t toUTF8Offs[] =
695     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
696    static const int32_t fmUTF8Offs[] =
697     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
698
699#ifdef U_ENABLE_GENERIC_ISO_2022
700    /* Same as UTF8, but with ^[%B preceeding */
701    static const const uint8_t expectedISO2022[] =
702     { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
703    static const int32_t toISO2022Offs[]     =
704     { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
705       0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
706    static const int32_t fmISO2022Offs[] =
707     { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
708#endif
709
710    /*  1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
711    static const uint8_t expectedIBM930[] =
712     { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
713    static const int32_t toIBM930Offs[] =
714     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
715    static const int32_t fmIBM930Offs[] =
716     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
717
718    /* 1 2 3 0 h1 h2 h3 . MBCS*/
719    static const uint8_t expectedIBM943[] =
720     {  0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
721    static const int32_t toIBM943Offs    [] =
722     {  0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
723    static const int32_t fmIBM943Offs[] =
724     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
725
726    /* 1 2 3 0 h1 h2 h3 . DBCS*/
727    static const uint8_t expectedIBM9027[] =
728     {  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
729    static const int32_t toIBM9027Offs    [] =
730     {  0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
731
732     /* 1 2 3 0 <?> <?> <?> . SBCS*/
733    static const uint8_t expectedIBM920[] =
734     {  0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
735    static const int32_t toIBM920Offs    [] =
736     {  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
737
738    /* 1 2 3 0 <?> <?> <?> . SBCS*/
739    static const uint8_t expectedISO88593[] =
740     { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
741    static const int32_t toISO88593Offs[]     =
742     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
743
744    /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
745    static const uint8_t expectedLATIN1[] =
746     { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
747    static const int32_t toLATIN1Offs[]     =
748     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
749
750
751    /*  etc */
752    static const uint8_t expectedUTF16BE[] =
753     { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
754    static const int32_t toUTF16BEOffs[]=
755     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
756    static const int32_t fmUTF16BEOffs[] =
757     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e, 0x0010, 0x0010 };
758
759    static const uint8_t expectedUTF16LE[] =
760     { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
761    static const int32_t toUTF16LEOffs[]=
762     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
763    static const int32_t fmUTF16LEOffs[] =
764     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
765
766    static const uint8_t expectedUTF32BE[] =
767     { 0x00, 0x00, 0x00, 0x31,
768       0x00, 0x00, 0x00, 0x32,
769       0x00, 0x00, 0x00, 0x33,
770       0x00, 0x00, 0x00, 0x00,
771       0x00, 0x00, 0x4e, 0x00,
772       0x00, 0x00, 0x4e, 0x8c,
773       0x00, 0x00, 0x4e, 0x09,
774       0x00, 0x00, 0x00, 0x2e,
775       0x00, 0x02, 0x00, 0x21 };
776    static const int32_t toUTF32BEOffs[]=
777     { 0x00, 0x00, 0x00, 0x00,
778       0x01, 0x01, 0x01, 0x01,
779       0x02, 0x02, 0x02, 0x02,
780       0x03, 0x03, 0x03, 0x03,
781       0x04, 0x04, 0x04, 0x04,
782       0x05, 0x05, 0x05, 0x05,
783       0x06, 0x06, 0x06, 0x06,
784       0x07, 0x07, 0x07, 0x07,
785       0x08, 0x08, 0x08, 0x08,
786       0x08, 0x08, 0x08, 0x08 };
787    static const int32_t fmUTF32BEOffs[] =
788     { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c, 0x0020, 0x0020 };
789
790    static const uint8_t expectedUTF32LE[] =
791     { 0x31, 0x00, 0x00, 0x00,
792       0x32, 0x00, 0x00, 0x00,
793       0x33, 0x00, 0x00, 0x00,
794       0x00, 0x00, 0x00, 0x00,
795       0x00, 0x4e, 0x00, 0x00,
796       0x8c, 0x4e, 0x00, 0x00,
797       0x09, 0x4e, 0x00, 0x00,
798       0x2e, 0x00, 0x00, 0x00,
799       0x21, 0x00, 0x02, 0x00 };
800    static const int32_t toUTF32LEOffs[]=
801     { 0x00, 0x00, 0x00, 0x00,
802       0x01, 0x01, 0x01, 0x01,
803       0x02, 0x02, 0x02, 0x02,
804       0x03, 0x03, 0x03, 0x03,
805       0x04, 0x04, 0x04, 0x04,
806       0x05, 0x05, 0x05, 0x05,
807       0x06, 0x06, 0x06, 0x06,
808       0x07, 0x07, 0x07, 0x07,
809       0x08, 0x08, 0x08, 0x08,
810       0x08, 0x08, 0x08, 0x08 };
811    static const int32_t fmUTF32LEOffs[] =
812     { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
813
814
815
816
817/** Test chars #2 **/
818
819    /* Sahha [health],  slashed h's */
820    static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
821    static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
822
823    /* LMBCS */
824    static const UChar LMBCSUChars[]     = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
825    static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
826    static const int32_t toLMBCSOffs[]   = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
827    static const int32_t fmLMBCSOffs[]   = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
828    /*********************************** START OF CODE finally *************/
829
830    gInBufferSize = insize;
831    gOutBufferSize = outsize;
832
833    log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
834
835
836    /*UTF-8*/
837    testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
838        expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
839
840    log_verbose("Test surrogate behaviour for UTF8\n");
841    {
842        static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
843        static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
844                           0xf0, 0x90, 0x90, 0x81,
845                           0xef, 0xbf, 0xbd
846        };
847        static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
848        testConvertFromU(testinput, UPRV_LENGTHOF(testinput),
849                         expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
850
851
852    }
853
854#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
855    /*ISO-2022*/
856    testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
857        expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
858#endif
859
860    /*UTF16 LE*/
861    testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
862        expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
863    /*UTF16 BE*/
864    testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
865        expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
866    /*UTF32 LE*/
867    testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
868        expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
869    /*UTF32 BE*/
870    testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
871        expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
872
873    /*LATIN_1*/
874    testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
875        expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
876
877#if !UCONFIG_NO_LEGACY_CONVERSION
878    /*EBCDIC_STATEFUL*/
879    testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
880        expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
881
882    testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
883        expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
884
885    /*MBCS*/
886
887    testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
888        expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
889    /*DBCS*/
890    testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
891        expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
892    /*SBCS*/
893    testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
894        expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
895    /*SBCS*/
896    testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
897        expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
898#endif
899
900
901/****/
902
903    /*UTF-8*/
904    testConvertToU(expectedUTF8, sizeof(expectedUTF8),
905        sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,FALSE);
906#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
907    /*ISO-2022*/
908    testConvertToU(expectedISO2022, sizeof(expectedISO2022),
909        sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,FALSE);
910#endif
911
912    /*UTF16 LE*/
913    testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
914        sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
915    /*UTF16 BE*/
916    testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
917        sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,FALSE);
918    /*UTF32 LE*/
919    testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
920        sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,FALSE);
921    /*UTF32 BE*/
922    testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
923        sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,FALSE);
924
925#if !UCONFIG_NO_LEGACY_CONVERSION
926    /*EBCDIC_STATEFUL*/
927    testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
928            UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,FALSE);
929    /*MBCS*/
930    testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
931            UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,FALSE);
932#endif
933
934    /* Try it again to make sure it still works */
935    testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
936        sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
937
938#if !UCONFIG_NO_LEGACY_CONVERSION
939    testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
940        malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,FALSE);
941
942    testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars),
943        expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
944
945    /*LMBCS*/
946    testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars),
947        expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
948    testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
949        LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,FALSE);
950#endif
951
952    /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
953    {
954        /* encode directly set D and set O */
955        static const uint8_t utf7[] = {
956            /*
957                Hi Mom -+Jjo--!
958                A+ImIDkQ.
959                +-
960                +ZeVnLIqe-
961            */
962            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
963            0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
964            0x2b, 0x2d,
965            0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
966        };
967        static const UChar unicode[] = {
968            /*
969                Hi Mom -<WHITE SMILING FACE>-!
970                A<NOT IDENTICAL TO><ALPHA>.
971                +
972                [Japanese word "nihongo"]
973            */
974            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
975            0x41, 0x2262, 0x0391, 0x2e,
976            0x2b,
977            0x65e5, 0x672c, 0x8a9e
978        };
979        static const int32_t toUnicodeOffsets[] = {
980            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
981            15, 17, 19, 23,
982            24,
983            27, 29, 32
984        };
985        static const int32_t fromUnicodeOffsets[] = {
986            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
987            11, 12, 12, 12, 13, 13, 13, 13, 14,
988            15, 15,
989            16, 16, 16, 17, 17, 17, 18, 18, 18, 18
990        };
991
992        /* same but escaping set O (the exclamation mark) */
993        static const uint8_t utf7Restricted[] = {
994            /*
995                Hi Mom -+Jjo--+ACE-
996                A+ImIDkQ.
997                +-
998                +ZeVnLIqe-
999            */
1000            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1001            0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1002            0x2b, 0x2d,
1003            0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1004        };
1005        static const int32_t toUnicodeOffsetsR[] = {
1006            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1007            19, 21, 23, 27,
1008            28,
1009            31, 33, 36
1010        };
1011        static const int32_t fromUnicodeOffsetsR[] = {
1012            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1013            11, 12, 12, 12, 13, 13, 13, 13, 14,
1014            15, 15,
1015            16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1016        };
1017
1018        testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
1019
1020        testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,FALSE);
1021
1022        testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
1023
1024        testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
1025    }
1026
1027    /*
1028     * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1029     * modified according to RFC 2060,
1030     * and supplemented with the one example in RFC 2060 itself.
1031     */
1032    {
1033        static const uint8_t imap[] = {
1034            /*  Hi Mom -&Jjo--!
1035                A&ImIDkQ-.
1036                &-
1037                &ZeVnLIqe-
1038                \
1039                ~peter
1040                /mail
1041                /&ZeVnLIqe-
1042                /&U,BTFw-
1043            */
1044            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1045            0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1046            0x26, 0x2d,
1047            0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1048            0x5c,
1049            0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1050            0x2f, 0x6d, 0x61, 0x69, 0x6c,
1051            0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1052            0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1053        };
1054        static const UChar unicode[] = {
1055            /*  Hi Mom -<WHITE SMILING FACE>-!
1056                A<NOT IDENTICAL TO><ALPHA>.
1057                &
1058                [Japanese word "nihongo"]
1059                \
1060                ~peter
1061                /mail
1062                /<65e5, 672c, 8a9e>
1063                /<53f0, 5317>
1064            */
1065            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1066            0x41, 0x2262, 0x0391, 0x2e,
1067            0x26,
1068            0x65e5, 0x672c, 0x8a9e,
1069            0x5c,
1070            0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1071            0x2f, 0x6d, 0x61, 0x69, 0x6c,
1072            0x2f, 0x65e5, 0x672c, 0x8a9e,
1073            0x2f, 0x53f0, 0x5317
1074        };
1075        static const int32_t toUnicodeOffsets[] = {
1076            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1077            15, 17, 19, 24,
1078            25,
1079            28, 30, 33,
1080            37,
1081            38, 39, 40, 41, 42, 43,
1082            44, 45, 46, 47, 48,
1083            49, 51, 53, 56,
1084            60, 62, 64
1085        };
1086        static const int32_t fromUnicodeOffsets[] = {
1087            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1088            11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1089            15, 15,
1090            16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1091            19,
1092            20, 21, 22, 23, 24, 25,
1093            26, 27, 28, 29, 30,
1094            31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1095            35, 36, 36, 36, 37, 37, 37, 37, 37
1096        };
1097
1098        testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1099
1100        testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1101    }
1102
1103    /* Test UTF-8 bad data handling*/
1104    {
1105        static const uint8_t utf8[]={
1106            0x61,
1107            0xf7, 0xbf, 0xbf, 0xbf,         /* > 10FFFF */
1108            0x00,
1109            0x62,
1110            0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1111            0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1112            0xf4, 0x8f, 0xbf, 0xbf,         /* 10FFFF */
1113            0xdf, 0xbf,                     /* 7ff */
1114            0xbf,                           /* truncated tail */
1115            0xf4, 0x90, 0x80, 0x80,         /* 110000 */
1116            0x02
1117        };
1118
1119        static const uint16_t utf8Expected[]={
1120            0x0061,
1121            0xfffd, 0xfffd, 0xfffd, 0xfffd,
1122            0x0000,
1123            0x0062,
1124            0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1125            0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1126            0xdbff, 0xdfff,
1127            0x07ff,
1128            0xfffd,
1129            0xfffd, 0xfffd, 0xfffd, 0xfffd,
1130            0x0002
1131        };
1132
1133        static const int32_t utf8Offsets[]={
1134            0,
1135            1, 2, 3, 4,
1136            5,
1137            6,
1138            7, 8, 9, 10, 11,
1139            12, 13, 14, 15, 16,
1140            17, 17,
1141            21,
1142            23,
1143            24, 25, 26, 27,
1144            28
1145        };
1146        testConvertToU(utf8, sizeof(utf8),
1147                       utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,FALSE);
1148
1149    }
1150
1151    /* Test UTF-32BE bad data handling*/
1152    {
1153        static const uint8_t utf32[]={
1154            0x00, 0x00, 0x00, 0x61,
1155            0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
1156            0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1157            0x00, 0x00, 0x00, 0x62,
1158            0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1159            0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
1160            0x00, 0x00, 0x01, 0x62,
1161            0x00, 0x00, 0x02, 0x62
1162        };
1163        static const uint16_t utf32Expected[]={
1164            0x0061,
1165            0xfffd,         /* 0x110000 out of range */
1166            0xDBFF,         /* 0x10FFFF in range */
1167            0xDFFF,
1168            0x0062,
1169            0xfffd,         /* 0xffffffff out of range */
1170            0xfffd,         /* 0x7fffffff out of range */
1171            0x0162,
1172            0x0262
1173        };
1174        static const int32_t utf32Offsets[]={
1175            0, 4, 8, 8, 12, 16, 20, 24, 28
1176        };
1177        static const uint8_t utf32ExpectedBack[]={
1178            0x00, 0x00, 0x00, 0x61,
1179            0x00, 0x00, 0xff, 0xfd,         /* 0x110000 out of range */
1180            0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1181            0x00, 0x00, 0x00, 0x62,
1182            0x00, 0x00, 0xff, 0xfd,         /* 0xffffffff out of range */
1183            0x00, 0x00, 0xff, 0xfd,         /* 0x7fffffff out of range */
1184            0x00, 0x00, 0x01, 0x62,
1185            0x00, 0x00, 0x02, 0x62
1186        };
1187        static const int32_t utf32OffsetsBack[]={
1188            0,0,0,0,
1189            1,1,1,1,
1190            2,2,2,2,
1191            4,4,4,4,
1192            5,5,5,5,
1193            6,6,6,6,
1194            7,7,7,7,
1195            8,8,8,8
1196        };
1197
1198        testConvertToU(utf32, sizeof(utf32),
1199                       utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,FALSE);
1200        testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1201            utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1202    }
1203
1204    /* Test UTF-32LE bad data handling*/
1205    {
1206        static const uint8_t utf32[]={
1207            0x61, 0x00, 0x00, 0x00,
1208            0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
1209            0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1210            0x62, 0x00, 0x00, 0x00,
1211            0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1212            0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
1213            0x62, 0x01, 0x00, 0x00,
1214            0x62, 0x02, 0x00, 0x00,
1215        };
1216
1217        static const uint16_t utf32Expected[]={
1218            0x0061,
1219            0xfffd,         /* 0x110000 out of range */
1220            0xDBFF,         /* 0x10FFFF in range */
1221            0xDFFF,
1222            0x0062,
1223            0xfffd,         /* 0xffffffff out of range */
1224            0xfffd,         /* 0x7fffffff out of range */
1225            0x0162,
1226            0x0262
1227        };
1228        static const int32_t utf32Offsets[]={
1229            0, 4, 8, 8, 12, 16, 20, 24, 28
1230        };
1231        static const uint8_t utf32ExpectedBack[]={
1232            0x61, 0x00, 0x00, 0x00,
1233            0xfd, 0xff, 0x00, 0x00,         /* 0x110000 out of range */
1234            0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1235            0x62, 0x00, 0x00, 0x00,
1236            0xfd, 0xff, 0x00, 0x00,         /* 0xffffffff out of range */
1237            0xfd, 0xff, 0x00, 0x00,         /* 0x7fffffff out of range */
1238            0x62, 0x01, 0x00, 0x00,
1239            0x62, 0x02, 0x00, 0x00
1240        };
1241        static const int32_t utf32OffsetsBack[]={
1242            0,0,0,0,
1243            1,1,1,1,
1244            2,2,2,2,
1245            4,4,4,4,
1246            5,5,5,5,
1247            6,6,6,6,
1248            7,7,7,7,
1249            8,8,8,8
1250        };
1251        testConvertToU(utf32, sizeof(utf32),
1252            utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,FALSE );
1253        testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1254            utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1255    }
1256}
1257
1258static void TestCoverageMBCS(){
1259#if 0
1260    UErrorCode status = U_ZERO_ERROR;
1261    const char *directory = loadTestData(&status);
1262    char* tdpath = NULL;
1263    char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1264    int len = strlen(directory);
1265    char* index=NULL;
1266
1267    tdpath = (char*) malloc(sizeof(char) * (len * 2));
1268    uprv_strcpy(saveDirectory,u_getDataDirectory());
1269    log_verbose("Retrieved data directory %s \n",saveDirectory);
1270    uprv_strcpy(tdpath,directory);
1271    index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1272
1273    if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1274            *(index+1)=0;
1275    }
1276    u_setDataDirectory(tdpath);
1277    log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1278#endif
1279
1280    /*some more test to increase the code coverage in MBCS.  Create an test converter from test1.ucm
1281      which is test file for MBCS conversion with single-byte codepage data.*/
1282    {
1283
1284        /* MBCS with single byte codepage data test1.ucm*/
1285        const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1286        const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1287        int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
1288
1289        /*from Unicode*/
1290        testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1291            expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1292    }
1293
1294    /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
1295      which is test file for MBCS conversion with three-byte codepage data.*/
1296    {
1297
1298        /* MBCS with three byte codepage data test3.ucm*/
1299        const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1300        const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a,  0xff,};
1301        int32_t  totest3Offs[]        = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1302
1303        const uint8_t test3input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1304        const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1305        int32_t fromtest3Offs[]       = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1306
1307        /*from Unicode*/
1308        testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1309            expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1310
1311        /*to Unicode*/
1312        testConvertToU(test3input, sizeof(test3input),
1313            expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,FALSE);
1314
1315    }
1316
1317    /*some more test to increase the code coverage in MBCS.  Create an test converter from test4.ucm
1318      which is test file for MBCS conversion with four-byte codepage data.*/
1319    {
1320
1321        /* MBCS with three byte codepage data test4.ucm*/
1322        static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1323        static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1324        static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1325
1326        static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1327        static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1328        static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1329
1330        /*from Unicode*/
1331        testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1332            expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1333
1334        /*to Unicode*/
1335        testConvertToU(test4input, sizeof(test4input),
1336            expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,FALSE );
1337
1338    }
1339#if 0
1340    free(tdpath);
1341    /* restore the original data directory */
1342    log_verbose("Setting the data directory to %s \n", saveDirectory);
1343    u_setDataDirectory(saveDirectory);
1344    free(saveDirectory);
1345#endif
1346
1347}
1348
1349static void TestConverterType(const char *convName, UConverterType convType) {
1350    UConverter* myConverter;
1351    UErrorCode err = U_ZERO_ERROR;
1352
1353    myConverter = my_ucnv_open(convName, &err);
1354
1355    if (U_FAILURE(err)) {
1356        log_data_err("Failed to create an %s converter\n", convName);
1357        return;
1358    }
1359    else
1360    {
1361        if (ucnv_getType(myConverter)!=convType) {
1362            log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1363                convName, convType);
1364        }
1365        else {
1366            log_verbose("ucnv_getType %s ok\n", convName);
1367        }
1368    }
1369    ucnv_close(myConverter);
1370}
1371
1372static void TestConverterTypesAndStarters()
1373{
1374#if !UCONFIG_NO_LEGACY_CONVERSION
1375    UConverter* myConverter;
1376    UErrorCode err = U_ZERO_ERROR;
1377    UBool mystarters[256];
1378
1379/*    const UBool expectedKSCstarters[256] = {
1380        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1381        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1384        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1385        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1386        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1387        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1388        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1389        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1390        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1391        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1392        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1393        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1394        FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1395        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1396        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1397        TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1398        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1399        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1400        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1401        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1402        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1403        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1404        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1405        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1406
1407
1408    log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1409
1410    myConverter = ucnv_open("ksc", &err);
1411    if (U_FAILURE(err)) {
1412      log_data_err("Failed to create an ibm-ksc converter\n");
1413      return;
1414    }
1415    else
1416    {
1417        if (ucnv_getType(myConverter)!=UCNV_MBCS)
1418            log_err("ucnv_getType Failed for ibm-949\n");
1419        else
1420            log_verbose("ucnv_getType ibm-949 ok\n");
1421
1422        if(myConverter!=NULL)
1423            ucnv_getStarters(myConverter, mystarters, &err);
1424
1425        /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1426          log_err("Failed ucnv_getStarters for ksc\n");
1427          else
1428          log_verbose("ucnv_getStarters ok\n");*/
1429
1430    }
1431    ucnv_close(myConverter);
1432
1433    TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1434    TestConverterType("ibm-878", UCNV_SBCS);
1435#endif
1436
1437    TestConverterType("iso-8859-1", UCNV_LATIN_1);
1438
1439    TestConverterType("ibm-1208", UCNV_UTF8);
1440
1441    TestConverterType("utf-8", UCNV_UTF8);
1442    TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1443    TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1444    TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1445    TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1446
1447#if !UCONFIG_NO_LEGACY_CONVERSION
1448
1449#if defined(U_ENABLE_GENERIC_ISO_2022)
1450    TestConverterType("iso-2022", UCNV_ISO_2022);
1451#endif
1452
1453    TestConverterType("hz", UCNV_HZ);
1454#endif
1455
1456    TestConverterType("scsu", UCNV_SCSU);
1457
1458#if !UCONFIG_NO_LEGACY_CONVERSION
1459    TestConverterType("x-iscii-de", UCNV_ISCII);
1460#endif
1461
1462    TestConverterType("ascii", UCNV_US_ASCII);
1463    TestConverterType("utf-7", UCNV_UTF7);
1464    TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1465    TestConverterType("bocu-1", UCNV_BOCU1);
1466}
1467
1468static void
1469TestAmbiguousConverter(UConverter *cnv) {
1470    static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1471    UChar outUnicode[20]={ 0, 0, 0, 0 };
1472
1473    const char *s;
1474    UChar *u;
1475    UErrorCode errorCode;
1476    UBool isAmbiguous;
1477
1478    /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1479    errorCode=U_ZERO_ERROR;
1480    s=inBytes;
1481    u=outUnicode;
1482    ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
1483    if(U_FAILURE(errorCode)) {
1484        /* we do not care about general failures in this test; the input may just not be mappable */
1485        return;
1486    }
1487
1488    if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1489        /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1490        /* There are some encodings that are partially ASCII based,
1491        like the ISO-7 and GSM series of codepages, which we ignore. */
1492        return;
1493    }
1494
1495    isAmbiguous=ucnv_isAmbiguous(cnv);
1496
1497    /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1498    if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1499        log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1500            ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1501        return;
1502    }
1503
1504    if(outUnicode[2]!=0x5c) {
1505        /* needs fixup, fix it */
1506        ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1507        if(outUnicode[2]!=0x5c) {
1508            /* the fix failed */
1509            log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1510            return;
1511        }
1512    }
1513}
1514
1515static void TestAmbiguous()
1516{
1517    UErrorCode status = U_ZERO_ERROR;
1518    UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1519    static const char target[] = {
1520        /* "\\usr\\local\\share\\data\\icutest.txt" */
1521        0x5c, 0x75, 0x73, 0x72,
1522        0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1523        0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1524        0x5c, 0x64, 0x61, 0x74, 0x61,
1525        0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1526        0
1527    };
1528    UChar asciiResult[200], sjisResult[200];
1529    int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1530    const char *name;
1531
1532    /* enumerate all converters */
1533    status=U_ZERO_ERROR;
1534    for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1535        cnv=ucnv_open(name, &status);
1536        if(U_SUCCESS(status)) {
1537            /* BEGIN android-changed. To save space Android does not build full ISO-2022-CN CJK tables. */
1538            const char* cnvName = ucnv_getName(cnv, &status);
1539            if (strlen(cnvName) < 8 ||
1540                strncmp(cnvName, "ISO_2022_CN", 8) != 0) {
1541            TestAmbiguousConverter(cnv);
1542            }
1543            /* END android-changed */
1544            ucnv_close(cnv);
1545        } else {
1546            log_err("error: unable to open available converter \"%s\"\n", name);
1547            status=U_ZERO_ERROR;
1548        }
1549    }
1550
1551#if !UCONFIG_NO_LEGACY_CONVERSION
1552    sjis_cnv = ucnv_open("ibm-943", &status);
1553    if (U_FAILURE(status))
1554    {
1555        log_data_err("Failed to create a SJIS converter\n");
1556        return;
1557    }
1558    ascii_cnv = ucnv_open("LATIN-1", &status);
1559    if (U_FAILURE(status))
1560    {
1561        log_data_err("Failed to create a LATIN-1 converter\n");
1562        ucnv_close(sjis_cnv);
1563        return;
1564    }
1565    /* convert target from SJIS to Unicode */
1566    sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status);
1567    if (U_FAILURE(status))
1568    {
1569        log_err("Failed to convert the SJIS string.\n");
1570        ucnv_close(sjis_cnv);
1571        ucnv_close(ascii_cnv);
1572        return;
1573    }
1574    /* convert target from Latin-1 to Unicode */
1575    /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status);
1576    if (U_FAILURE(status))
1577    {
1578        log_err("Failed to convert the Latin-1 string.\n");
1579        ucnv_close(sjis_cnv);
1580        ucnv_close(ascii_cnv);
1581        return;
1582    }
1583    if (!ucnv_isAmbiguous(sjis_cnv))
1584    {
1585        log_err("SJIS converter should contain ambiguous character mappings.\n");
1586        ucnv_close(sjis_cnv);
1587        ucnv_close(ascii_cnv);
1588        return;
1589    }
1590    if (u_strcmp(sjisResult, asciiResult) == 0)
1591    {
1592        log_err("File separators for SJIS don't need to be fixed.\n");
1593    }
1594    ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1595    if (u_strcmp(sjisResult, asciiResult) != 0)
1596    {
1597        log_err("Fixing file separator for SJIS failed.\n");
1598    }
1599    ucnv_close(sjis_cnv);
1600    ucnv_close(ascii_cnv);
1601#endif
1602}
1603
1604static void
1605TestSignatureDetection(){
1606    /* with null terminated strings */
1607    {
1608        static const char* data[] = {
1609                "\xFE\xFF\x00\x00",     /* UTF-16BE */
1610                "\xFF\xFE\x00\x00",     /* UTF-16LE */
1611                "\xEF\xBB\xBF\x00",     /* UTF-8    */
1612                "\x0E\xFE\xFF\x00",     /* SCSU     */
1613
1614                "\xFE\xFF",             /* UTF-16BE */
1615                "\xFF\xFE",             /* UTF-16LE */
1616                "\xEF\xBB\xBF",         /* UTF-8    */
1617                "\x0E\xFE\xFF",         /* SCSU     */
1618
1619                "\xFE\xFF\x41\x42",     /* UTF-16BE */
1620                "\xFF\xFE\x41\x41",     /* UTF-16LE */
1621                "\xEF\xBB\xBF\x41",     /* UTF-8    */
1622                "\x0E\xFE\xFF\x41",     /* SCSU     */
1623
1624                "\x2B\x2F\x76\x38\x2D", /* UTF-7    */
1625                "\x2B\x2F\x76\x38\x41", /* UTF-7    */
1626                "\x2B\x2F\x76\x39\x41", /* UTF-7    */
1627                "\x2B\x2F\x76\x2B\x41", /* UTF-7    */
1628                "\x2B\x2F\x76\x2F\x41",  /* UTF-7    */
1629
1630                "\xDD\x73\x66\x73"      /* UTF-EBCDIC */
1631        };
1632        static const char* expected[] = {
1633                "UTF-16BE",
1634                "UTF-16LE",
1635                "UTF-8",
1636                "SCSU",
1637
1638                "UTF-16BE",
1639                "UTF-16LE",
1640                "UTF-8",
1641                "SCSU",
1642
1643                "UTF-16BE",
1644                "UTF-16LE",
1645                "UTF-8",
1646                "SCSU",
1647
1648                "UTF-7",
1649                "UTF-7",
1650                "UTF-7",
1651                "UTF-7",
1652                "UTF-7",
1653                "UTF-EBCDIC"
1654        };
1655        static const int32_t expectedLength[] ={
1656            2,
1657            2,
1658            3,
1659            3,
1660
1661            2,
1662            2,
1663            3,
1664            3,
1665
1666            2,
1667            2,
1668            3,
1669            3,
1670
1671            5,
1672            4,
1673            4,
1674            4,
1675            4,
1676            4
1677        };
1678        int i=0;
1679        UErrorCode err;
1680        int32_t signatureLength = -1;
1681        const char* source = NULL;
1682        const char* enc = NULL;
1683        for( ; i<UPRV_LENGTHOF(data); i++){
1684            err = U_ZERO_ERROR;
1685            source = data[i];
1686            enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1687            if(U_FAILURE(err)){
1688                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1689                continue;
1690            }
1691            if(enc == NULL || strcmp(enc,expected[i]) !=0){
1692                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1693                continue;
1694            }
1695            if(signatureLength != expectedLength[i]){
1696                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1697            }
1698        }
1699    }
1700    {
1701        static const char* data[] = {
1702                "\xFE\xFF\x00",         /* UTF-16BE */
1703                "\xFF\xFE\x00",         /* UTF-16LE */
1704                "\xEF\xBB\xBF\x00",     /* UTF-8    */
1705                "\x0E\xFE\xFF\x00",     /* SCSU     */
1706                "\x00\x00\xFE\xFF",     /* UTF-32BE */
1707                "\xFF\xFE\x00\x00",     /* UTF-32LE */
1708                "\xFE\xFF",             /* UTF-16BE */
1709                "\xFF\xFE",             /* UTF-16LE */
1710                "\xEF\xBB\xBF",         /* UTF-8    */
1711                "\x0E\xFE\xFF",         /* SCSU     */
1712                "\x00\x00\xFE\xFF",     /* UTF-32BE */
1713                "\xFF\xFE\x00\x00",     /* UTF-32LE */
1714                "\xFE\xFF\x41\x42",     /* UTF-16BE */
1715                "\xFF\xFE\x41\x41",     /* UTF-16LE */
1716                "\xEF\xBB\xBF\x41",     /* UTF-8    */
1717                "\x0E\xFE\xFF\x41",     /* SCSU     */
1718                "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1719                "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1720                "\xFB\xEE\x28",         /* BOCU-1   */
1721                "\xFF\x41\x42"          /* NULL     */
1722        };
1723        static const int len[] = {
1724            3,
1725            3,
1726            4,
1727            4,
1728            4,
1729            4,
1730            2,
1731            2,
1732            3,
1733            3,
1734            4,
1735            4,
1736            4,
1737            4,
1738            4,
1739            4,
1740            5,
1741            5,
1742            3,
1743            3
1744        };
1745
1746        static const char* expected[] = {
1747                "UTF-16BE",
1748                "UTF-16LE",
1749                "UTF-8",
1750                "SCSU",
1751                "UTF-32BE",
1752                "UTF-32LE",
1753                "UTF-16BE",
1754                "UTF-16LE",
1755                "UTF-8",
1756                "SCSU",
1757                "UTF-32BE",
1758                "UTF-32LE",
1759                "UTF-16BE",
1760                "UTF-16LE",
1761                "UTF-8",
1762                "SCSU",
1763                "UTF-32BE",
1764                "UTF-32LE",
1765                "BOCU-1",
1766                NULL
1767        };
1768        static const int32_t expectedLength[] ={
1769            2,
1770            2,
1771            3,
1772            3,
1773            4,
1774            4,
1775            2,
1776            2,
1777            3,
1778            3,
1779            4,
1780            4,
1781            2,
1782            2,
1783            3,
1784            3,
1785            4,
1786            4,
1787            3,
1788            0
1789        };
1790        int i=0;
1791        UErrorCode err;
1792        int32_t signatureLength = -1;
1793        int32_t sourceLength=-1;
1794        const char* source = NULL;
1795        const char* enc = NULL;
1796        for( ; i<UPRV_LENGTHOF(data); i++){
1797            err = U_ZERO_ERROR;
1798            source = data[i];
1799            sourceLength = len[i];
1800            enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1801            if(U_FAILURE(err)){
1802                log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1803                continue;
1804            }
1805            if(enc == NULL || strcmp(enc,expected[i]) !=0){
1806                if(expected[i] !=NULL){
1807                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1808                 continue;
1809                }
1810            }
1811            if(signatureLength != expectedLength[i]){
1812                log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1813            }
1814        }
1815    }
1816}
1817
1818static void TestUTF7() {
1819    /* test input */
1820    static const uint8_t in[]={
1821        /* H - +Jjo- - ! +- +2AHcAQ */
1822        0x48,
1823        0x2d,
1824        0x2b, 0x4a, 0x6a, 0x6f,
1825        0x2d, 0x2d,
1826        0x21,
1827        0x2b, 0x2d,
1828        0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1829    };
1830
1831    /* expected test results */
1832    static const int32_t results[]={
1833        /* number of bytes read, code point */
1834        1, 0x48,
1835        1, 0x2d,
1836        4, 0x263a, /* <WHITE SMILING FACE> */
1837        2, 0x2d,
1838        1, 0x21,
1839        2, 0x2b,
1840        7, 0x10401
1841    };
1842
1843    const char *cnvName;
1844    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1845    UErrorCode errorCode=U_ZERO_ERROR;
1846    UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1847    if(U_FAILURE(errorCode)) {
1848        log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode));
1849        return;
1850    }
1851    TestNextUChar(cnv, source, limit, results, "UTF-7");
1852    /* Test the condition when source >= sourceLimit */
1853    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1854    cnvName = ucnv_getName(cnv, &errorCode);
1855    if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1856        log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1857    }
1858    ucnv_close(cnv);
1859}
1860
1861static void TestIMAP() {
1862    /* test input */
1863    static const uint8_t in[]={
1864        /* H - &Jjo- - ! &- &2AHcAQ- \ */
1865        0x48,
1866        0x2d,
1867        0x26, 0x4a, 0x6a, 0x6f,
1868        0x2d, 0x2d,
1869        0x21,
1870        0x26, 0x2d,
1871        0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1872    };
1873
1874    /* expected test results */
1875    static const int32_t results[]={
1876        /* number of bytes read, code point */
1877        1, 0x48,
1878        1, 0x2d,
1879        4, 0x263a, /* <WHITE SMILING FACE> */
1880        2, 0x2d,
1881        1, 0x21,
1882        2, 0x26,
1883        7, 0x10401
1884    };
1885
1886    const char *cnvName;
1887    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1888    UErrorCode errorCode=U_ZERO_ERROR;
1889    UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1890    if(U_FAILURE(errorCode)) {
1891        log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode));
1892        return;
1893    }
1894    TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1895    /* Test the condition when source >= sourceLimit */
1896    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1897    cnvName = ucnv_getName(cnv, &errorCode);
1898    if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1899        log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1900    }
1901    ucnv_close(cnv);
1902}
1903
1904static void TestUTF8() {
1905    /* test input */
1906    static const uint8_t in[]={
1907        0x61,
1908        0xc2, 0x80,
1909        0xe0, 0xa0, 0x80,
1910        0xf0, 0x90, 0x80, 0x80,
1911        0xf4, 0x84, 0x8c, 0xa1,
1912        0xf0, 0x90, 0x90, 0x81
1913    };
1914
1915    /* expected test results */
1916    static const int32_t results[]={
1917        /* number of bytes read, code point */
1918        1, 0x61,
1919        2, 0x80,
1920        3, 0x800,
1921        4, 0x10000,
1922        4, 0x104321,
1923        4, 0x10401
1924    };
1925
1926    /* error test input */
1927    static const uint8_t in2[]={
1928        0x61,
1929        0xc0, 0x80,                     /* illegal non-shortest form */
1930        0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1931        0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1932        0xc0, 0xc0,                     /* illegal trail byte */
1933        0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1934        0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1935        0xfe,                           /* illegal byte altogether */
1936        0x62
1937    };
1938
1939    /* expected error test results */
1940    static const int32_t results2[]={
1941        /* number of bytes read, code point */
1942        1, 0x61,
1943        22, 0x62
1944    };
1945
1946    UConverterToUCallback cb;
1947    const void *p;
1948
1949    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1950    UErrorCode errorCode=U_ZERO_ERROR;
1951    UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1952    if(U_FAILURE(errorCode)) {
1953        log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1954        return;
1955    }
1956    TestNextUChar(cnv, source, limit, results, "UTF-8");
1957    /* Test the condition when source >= sourceLimit */
1958    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1959
1960    /* test error behavior with a skip callback */
1961    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1962    source=(const char *)in2;
1963    limit=(const char *)(in2+sizeof(in2));
1964    TestNextUChar(cnv, source, limit, results2, "UTF-8");
1965
1966    ucnv_close(cnv);
1967}
1968
1969static void TestCESU8() {
1970    /* test input */
1971    static const uint8_t in[]={
1972        0x61,
1973        0xc2, 0x80,
1974        0xe0, 0xa0, 0x80,
1975        0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1976        0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1977        0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1978        0xef, 0xbf, 0xbc
1979    };
1980
1981    /* expected test results */
1982    static const int32_t results[]={
1983        /* number of bytes read, code point */
1984        1, 0x61,
1985        2, 0x80,
1986        3, 0x800,
1987        6, 0x10000,
1988        3, 0xdc01,
1989        -1,0xd802,  /* may read 3 or 6 bytes */
1990        -1,0x10ffff,/* may read 0 or 3 bytes */
1991        3, 0xfffc
1992    };
1993
1994    /* error test input */
1995    static const uint8_t in2[]={
1996        0x61,
1997        0xc0, 0x80,                     /* illegal non-shortest form */
1998        0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1999        0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
2000        0xc0, 0xc0,                     /* illegal trail byte */
2001        0xf0, 0x90, 0x80, 0x80,         /* illegal 4-byte supplementary code point */
2002        0xf4, 0x84, 0x8c, 0xa1,         /* illegal 4-byte supplementary code point */
2003        0xf0, 0x90, 0x90, 0x81,         /* illegal 4-byte supplementary code point */
2004        0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
2005        0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
2006        0xfe,                           /* illegal byte altogether */
2007        0x62
2008    };
2009
2010    /* expected error test results */
2011    static const int32_t results2[]={
2012        /* number of bytes read, code point */
2013        1, 0x61,
2014        34, 0x62
2015    };
2016
2017    UConverterToUCallback cb;
2018    const void *p;
2019
2020    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2021    UErrorCode errorCode=U_ZERO_ERROR;
2022    UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2023    if(U_FAILURE(errorCode)) {
2024        log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2025        return;
2026    }
2027    TestNextUChar(cnv, source, limit, results, "CESU-8");
2028    /* Test the condition when source >= sourceLimit */
2029    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2030
2031    /* test error behavior with a skip callback */
2032    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2033    source=(const char *)in2;
2034    limit=(const char *)(in2+sizeof(in2));
2035    TestNextUChar(cnv, source, limit, results2, "CESU-8");
2036
2037    ucnv_close(cnv);
2038}
2039
2040static void TestUTF16() {
2041    /* test input */
2042    static const uint8_t in1[]={
2043        0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2044    };
2045    static const uint8_t in2[]={
2046        0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2047    };
2048    static const uint8_t in3[]={
2049        0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2050    };
2051
2052    /* expected test results */
2053    static const int32_t results1[]={
2054        /* number of bytes read, code point */
2055        4, 0x4e00,
2056        2, 0xfeff
2057    };
2058    static const int32_t results2[]={
2059        /* number of bytes read, code point */
2060        4, 0x004e,
2061        2, 0xfffe
2062    };
2063    static const int32_t results3[]={
2064        /* number of bytes read, code point */
2065        2, 0xfefe,
2066        2, 0x4e00,
2067        2, 0xfeff,
2068        4, 0x20001
2069    };
2070
2071    const char *source, *limit;
2072
2073    UErrorCode errorCode=U_ZERO_ERROR;
2074    UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2075    if(U_FAILURE(errorCode)) {
2076        log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2077        return;
2078    }
2079
2080    source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2081    TestNextUChar(cnv, source, limit, results1, "UTF-16");
2082
2083    source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2084    ucnv_resetToUnicode(cnv);
2085    TestNextUChar(cnv, source, limit, results2, "UTF-16");
2086
2087    source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2088    ucnv_resetToUnicode(cnv);
2089    TestNextUChar(cnv, source, limit, results3, "UTF-16");
2090
2091    /* Test the condition when source >= sourceLimit */
2092    ucnv_resetToUnicode(cnv);
2093    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2094
2095    ucnv_close(cnv);
2096}
2097
2098static void TestUTF16BE() {
2099    /* test input */
2100    static const uint8_t in[]={
2101        0x00, 0x61,
2102        0x00, 0xc0,
2103        0x00, 0x31,
2104        0x00, 0xf4,
2105        0xce, 0xfe,
2106        0xd8, 0x01, 0xdc, 0x01
2107    };
2108
2109    /* expected test results */
2110    static const int32_t results[]={
2111        /* number of bytes read, code point */
2112        2, 0x61,
2113        2, 0xc0,
2114        2, 0x31,
2115        2, 0xf4,
2116        2, 0xcefe,
2117        4, 0x10401
2118    };
2119
2120    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2121    UErrorCode errorCode=U_ZERO_ERROR;
2122    UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2123    if(U_FAILURE(errorCode)) {
2124        log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2125        return;
2126    }
2127    TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2128    /* Test the condition when source >= sourceLimit */
2129    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2130    /*Test for the condition where there is an invalid character*/
2131    {
2132        static const uint8_t source2[]={0x61};
2133        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2134        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2135    }
2136#if 0
2137    /*
2138     * Test disabled because currently the UTF-16BE/LE converters are supposed
2139     * to not set errors for unpaired surrogates.
2140     * This may change with
2141     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2142     */
2143
2144    /*Test for the condition where there is a surrogate pair*/
2145    {
2146        const uint8_t source2[]={0xd8, 0x01};
2147        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2148    }
2149#endif
2150    ucnv_close(cnv);
2151}
2152
2153static void
2154TestUTF16LE() {
2155    /* test input */
2156    static const uint8_t in[]={
2157        0x61, 0x00,
2158        0x31, 0x00,
2159        0x4e, 0x2e,
2160        0x4e, 0x00,
2161        0x01, 0xd8, 0x01, 0xdc
2162    };
2163
2164    /* expected test results */
2165    static const int32_t results[]={
2166        /* number of bytes read, code point */
2167        2, 0x61,
2168        2, 0x31,
2169        2, 0x2e4e,
2170        2, 0x4e,
2171        4, 0x10401
2172    };
2173
2174    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2175    UErrorCode errorCode=U_ZERO_ERROR;
2176    UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2177    if(U_FAILURE(errorCode)) {
2178        log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2179        return;
2180    }
2181    TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2182    /* Test the condition when source >= sourceLimit */
2183    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2184    /*Test for the condition where there is an invalid character*/
2185    {
2186        static const uint8_t source2[]={0x61};
2187        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2188        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2189    }
2190#if 0
2191    /*
2192     * Test disabled because currently the UTF-16BE/LE converters are supposed
2193     * to not set errors for unpaired surrogates.
2194     * This may change with
2195     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2196     */
2197
2198    /*Test for the condition where there is a surrogate character*/
2199    {
2200        static const uint8_t source2[]={0x01, 0xd8};
2201        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2202    }
2203#endif
2204
2205    ucnv_close(cnv);
2206}
2207
2208static void TestUTF32() {
2209    /* test input */
2210    static const uint8_t in1[]={
2211        0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff
2212    };
2213    static const uint8_t in2[]={
2214        0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00
2215    };
2216    static const uint8_t in3[]={
2217        0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01
2218    };
2219
2220    /* expected test results */
2221    static const int32_t results1[]={
2222        /* number of bytes read, code point */
2223        8, 0x100f00,
2224        4, 0xfeff
2225    };
2226    static const int32_t results2[]={
2227        /* number of bytes read, code point */
2228        8, 0x0f1000,
2229        4, 0xfffe
2230    };
2231    static const int32_t results3[]={
2232        /* number of bytes read, code point */
2233        4, 0xfefe,
2234        4, 0x100f00,
2235        4, 0xfffd, /* unmatched surrogate */
2236        4, 0xfffd  /* unmatched surrogate */
2237    };
2238
2239    const char *source, *limit;
2240
2241    UErrorCode errorCode=U_ZERO_ERROR;
2242    UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2243    if(U_FAILURE(errorCode)) {
2244        log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2245        return;
2246    }
2247
2248    source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2249    TestNextUChar(cnv, source, limit, results1, "UTF-32");
2250
2251    source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2252    ucnv_resetToUnicode(cnv);
2253    TestNextUChar(cnv, source, limit, results2, "UTF-32");
2254
2255    source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2256    ucnv_resetToUnicode(cnv);
2257    TestNextUChar(cnv, source, limit, results3, "UTF-32");
2258
2259    /* Test the condition when source >= sourceLimit */
2260    ucnv_resetToUnicode(cnv);
2261    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2262
2263    ucnv_close(cnv);
2264}
2265
2266static void
2267TestUTF32BE() {
2268    /* test input */
2269    static const uint8_t in[]={
2270        0x00, 0x00, 0x00, 0x61,
2271        0x00, 0x00, 0x30, 0x61,
2272        0x00, 0x00, 0xdc, 0x00,
2273        0x00, 0x00, 0xd8, 0x00,
2274        0x00, 0x00, 0xdf, 0xff,
2275        0x00, 0x00, 0xff, 0xfe,
2276        0x00, 0x10, 0xab, 0xcd,
2277        0x00, 0x10, 0xff, 0xff
2278    };
2279
2280    /* expected test results */
2281    static const int32_t results[]={
2282        /* number of bytes read, code point */
2283        4, 0x61,
2284        4, 0x3061,
2285        4, 0xfffd,
2286        4, 0xfffd,
2287        4, 0xfffd,
2288        4, 0xfffe,
2289        4, 0x10abcd,
2290        4, 0x10ffff
2291    };
2292
2293    /* error test input */
2294    static const uint8_t in2[]={
2295        0x00, 0x00, 0x00, 0x61,
2296        0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
2297        0x00, 0x00, 0x00, 0x62,
2298        0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2299        0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
2300        0x00, 0x00, 0x01, 0x62,
2301        0x00, 0x00, 0x02, 0x62
2302    };
2303
2304    /* expected error test results */
2305    static const int32_t results2[]={
2306        /* number of bytes read, code point */
2307        4,  0x61,
2308        8,  0x62,
2309        12, 0x162,
2310        4,  0x262
2311    };
2312
2313    UConverterToUCallback cb;
2314    const void *p;
2315
2316    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2317    UErrorCode errorCode=U_ZERO_ERROR;
2318    UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2319    if(U_FAILURE(errorCode)) {
2320        log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2321        return;
2322    }
2323    TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2324
2325    /* Test the condition when source >= sourceLimit */
2326    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2327
2328    /* test error behavior with a skip callback */
2329    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2330    source=(const char *)in2;
2331    limit=(const char *)(in2+sizeof(in2));
2332    TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2333
2334    ucnv_close(cnv);
2335}
2336
2337static void
2338TestUTF32LE() {
2339    /* test input */
2340    static const uint8_t in[]={
2341        0x61, 0x00, 0x00, 0x00,
2342        0x61, 0x30, 0x00, 0x00,
2343        0x00, 0xdc, 0x00, 0x00,
2344        0x00, 0xd8, 0x00, 0x00,
2345        0xff, 0xdf, 0x00, 0x00,
2346        0xfe, 0xff, 0x00, 0x00,
2347        0xcd, 0xab, 0x10, 0x00,
2348        0xff, 0xff, 0x10, 0x00
2349    };
2350
2351    /* expected test results */
2352    static const int32_t results[]={
2353        /* number of bytes read, code point */
2354        4, 0x61,
2355        4, 0x3061,
2356        4, 0xfffd,
2357        4, 0xfffd,
2358        4, 0xfffd,
2359        4, 0xfffe,
2360        4, 0x10abcd,
2361        4, 0x10ffff
2362    };
2363
2364    /* error test input */
2365    static const uint8_t in2[]={
2366        0x61, 0x00, 0x00, 0x00,
2367        0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
2368        0x62, 0x00, 0x00, 0x00,
2369        0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2370        0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
2371        0x62, 0x01, 0x00, 0x00,
2372        0x62, 0x02, 0x00, 0x00,
2373    };
2374
2375    /* expected error test results */
2376    static const int32_t results2[]={
2377        /* number of bytes read, code point */
2378        4,  0x61,
2379        8,  0x62,
2380        12, 0x162,
2381        4,  0x262,
2382    };
2383
2384    UConverterToUCallback cb;
2385    const void *p;
2386
2387    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2388    UErrorCode errorCode=U_ZERO_ERROR;
2389    UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2390    if(U_FAILURE(errorCode)) {
2391        log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2392        return;
2393    }
2394    TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2395
2396    /* Test the condition when source >= sourceLimit */
2397    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2398
2399    /* test error behavior with a skip callback */
2400    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2401    source=(const char *)in2;
2402    limit=(const char *)(in2+sizeof(in2));
2403    TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2404
2405    ucnv_close(cnv);
2406}
2407
2408static void
2409TestLATIN1() {
2410    /* test input */
2411    static const uint8_t in[]={
2412       0x61,
2413       0x31,
2414       0x32,
2415       0xc0,
2416       0xf0,
2417       0xf4,
2418    };
2419
2420    /* expected test results */
2421    static const int32_t results[]={
2422        /* number of bytes read, code point */
2423        1, 0x61,
2424        1, 0x31,
2425        1, 0x32,
2426        1, 0xc0,
2427        1, 0xf0,
2428        1, 0xf4,
2429    };
2430    static const uint16_t in1[] = {
2431        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2432        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2433        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2434        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2435        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2436        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2437        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2438        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2439        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2440        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2441        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2442        0xcb, 0x82
2443    };
2444    static const uint8_t out1[] = {
2445        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2446        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2447        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2448        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2449        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2450        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2451        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2452        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2453        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2454        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2455        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2456        0xcb, 0x82
2457    };
2458    static const uint16_t in2[]={
2459        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2460        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2461        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2462        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2463        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2464        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2465        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2466        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2467        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2468        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2469        0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2470        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2471        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2472        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2473        0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2474        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2475        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2476        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2477        0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2478        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2479        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2480        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2481        0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2482        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2483        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2484        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2485        0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2486        0x37, 0x20, 0x2A, 0x2F,
2487    };
2488    static const unsigned char out2[]={
2489        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2490        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2491        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2492        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2493        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2494        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2495        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2496        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2497        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2498        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2499        0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2500        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2501        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2502        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2503        0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2504        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2505        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2506        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2507        0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2508        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2509        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2510        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2511        0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2512        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2513        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2514        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2515        0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2516        0x37, 0x20, 0x2A, 0x2F,
2517    };
2518    const char *source=(const char *)in;
2519    const char *limit=(const char *)in+sizeof(in);
2520
2521    UErrorCode errorCode=U_ZERO_ERROR;
2522    UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2523    if(U_FAILURE(errorCode)) {
2524        log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2525        return;
2526    }
2527    TestNextUChar(cnv, source, limit, results, "LATIN_1");
2528    /* Test the condition when source >= sourceLimit */
2529    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2530    TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2531    TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2532
2533    ucnv_close(cnv);
2534}
2535
2536static void
2537TestSBCS() {
2538    /* test input */
2539    static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2540    /* expected test results */
2541    static const int32_t results[]={
2542        /* number of bytes read, code point */
2543        1, 0x61,
2544        1, 0xbf,
2545        1, 0xc4,
2546        1, 0x2021,
2547        1, 0xf8ff,
2548        1, 0x00d9
2549    };
2550
2551    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2552    UErrorCode errorCode=U_ZERO_ERROR;
2553    UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2554    if(U_FAILURE(errorCode)) {
2555        log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2556        return;
2557    }
2558    TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2559    /* Test the condition when source >= sourceLimit */
2560    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2561    /*Test for Illegal character */ /*
2562    {
2563    static const uint8_t input1[]={ 0xA1 };
2564    const char* illegalsource=(const char*)input1;
2565    TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2566    }
2567   */
2568    ucnv_close(cnv);
2569}
2570
2571static void
2572TestDBCS() {
2573    /* test input */
2574    static const uint8_t in[]={
2575        0x44, 0x6a,
2576        0xc4, 0x9c,
2577        0x7a, 0x74,
2578        0x46, 0xab,
2579        0x42, 0x5b,
2580
2581    };
2582
2583    /* expected test results */
2584    static const int32_t results[]={
2585        /* number of bytes read, code point */
2586        2, 0x00a7,
2587        2, 0xe1d2,
2588        2, 0x6962,
2589        2, 0xf842,
2590        2, 0xffe5,
2591    };
2592
2593    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2594    UErrorCode errorCode=U_ZERO_ERROR;
2595
2596    UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2597    if(U_FAILURE(errorCode)) {
2598        log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2599        return;
2600    }
2601    TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2602    /* Test the condition when source >= sourceLimit */
2603    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2604    /*Test for the condition where there is an invalid character*/
2605    {
2606        static const uint8_t source2[]={0x1a, 0x1b};
2607        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2608    }
2609    /*Test for the condition where we have a truncated char*/
2610    {
2611        static const uint8_t source1[]={0xc4};
2612        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2613        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2614    }
2615    ucnv_close(cnv);
2616}
2617
2618static void
2619TestMBCS() {
2620    /* test input */
2621    static const uint8_t in[]={
2622        0x01,
2623        0xa6, 0xa3,
2624        0x00,
2625        0xa6, 0xa1,
2626        0x08,
2627        0xc2, 0x76,
2628        0xc2, 0x78,
2629
2630    };
2631
2632    /* expected test results */
2633    static const int32_t results[]={
2634        /* number of bytes read, code point */
2635        1, 0x0001,
2636        2, 0x250c,
2637        1, 0x0000,
2638        2, 0x2500,
2639        1, 0x0008,
2640        2, 0xd60c,
2641        2, 0xd60e,
2642    };
2643
2644    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2645    UErrorCode errorCode=U_ZERO_ERROR;
2646
2647    UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2648    if(U_FAILURE(errorCode)) {
2649        log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2650        return;
2651    }
2652    TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2653    /* Test the condition when source >= sourceLimit */
2654    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2655    /*Test for the condition where there is an invalid character*/
2656    {
2657        static const uint8_t source2[]={0xa1, 0x80};
2658        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2659    }
2660    /*Test for the condition where we have a truncated char*/
2661    {
2662        static const uint8_t source1[]={0xc4};
2663        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2664        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2665    }
2666    ucnv_close(cnv);
2667
2668}
2669
2670#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2671static void
2672TestICCRunout() {
2673/*    { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2674
2675    const char *cnvName = "ibm-1363";
2676    UErrorCode status = U_ZERO_ERROR;
2677    const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2678    /* UChar   expectUData[] = { 0x00a1, 0x001a }; */
2679    const char *source = sourceData;
2680    const char *sourceLim = sourceData+sizeof(sourceData);
2681    UChar c1, c2, c3;
2682    UConverter *cnv=ucnv_open(cnvName, &status);
2683    if(U_FAILURE(status)) {
2684        log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2685	return;
2686    }
2687
2688#if 0
2689    {
2690    UChar   targetBuf[256];
2691    UChar   *target = targetBuf;
2692    UChar   *targetLim = target+256;
2693    ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
2694
2695    log_info("After convert: target@%d, source@%d, status%s\n",
2696	     target-targetBuf, source-sourceData, u_errorName(status));
2697
2698    if(U_FAILURE(status)) {
2699	log_err("Failed to convert: %s\n", u_errorName(status));
2700    } else {
2701
2702    }
2703    }
2704#endif
2705
2706    c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2707    log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2708
2709    c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2710    log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2711
2712    c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2713    log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2714
2715    if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2716	log_verbose("OK\n");
2717    } else {
2718	log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2719    }
2720
2721    ucnv_close(cnv);
2722
2723}
2724#endif
2725
2726#ifdef U_ENABLE_GENERIC_ISO_2022
2727
2728static void
2729TestISO_2022() {
2730    /* test input */
2731    static const uint8_t in[]={
2732        0x1b, 0x25, 0x42,
2733        0x31,
2734        0x32,
2735        0x61,
2736        0xc2, 0x80,
2737        0xe0, 0xa0, 0x80,
2738        0xf0, 0x90, 0x80, 0x80
2739    };
2740
2741
2742
2743    /* expected test results */
2744    static const int32_t results[]={
2745        /* number of bytes read, code point */
2746        4, 0x0031,  /* 4 bytes including the escape sequence */
2747        1, 0x0032,
2748        1, 0x61,
2749        2, 0x80,
2750        3, 0x800,
2751        4, 0x10000
2752    };
2753
2754    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2755    UErrorCode errorCode=U_ZERO_ERROR;
2756    UConverter *cnv;
2757
2758    cnv=ucnv_open("ISO_2022", &errorCode);
2759    if(U_FAILURE(errorCode)) {
2760        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2761        return;
2762    }
2763    TestNextUChar(cnv, source, limit, results, "ISO_2022");
2764
2765    /* Test the condition when source >= sourceLimit */
2766    TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2767    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2768    /*Test for the condition where we have a truncated char*/
2769    {
2770        static const uint8_t source1[]={0xc4};
2771        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2772        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2773    }
2774    /*Test for the condition where there is an invalid character*/
2775    {
2776        static const uint8_t source2[]={0xa1, 0x01};
2777        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2778    }
2779    ucnv_close(cnv);
2780}
2781
2782#endif
2783
2784static void
2785TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2786    const UChar* uSource;
2787    const UChar* uSourceLimit;
2788    const char* cSource;
2789    const char* cSourceLimit;
2790    UChar *uTargetLimit =NULL;
2791    UChar *uTarget;
2792    char *cTarget;
2793    const char *cTargetLimit;
2794    char *cBuf;
2795    UChar *uBuf; /*,*test;*/
2796    int32_t uBufSize = 120;
2797    int len=0;
2798    int i=2;
2799    UErrorCode errorCode=U_ZERO_ERROR;
2800    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2801    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2802    ucnv_reset(cnv);
2803    for(;--i>0; ){
2804        uSource = (UChar*) source;
2805        uSourceLimit=(const UChar*)sourceLimit;
2806        cTarget = cBuf;
2807        uTarget = uBuf;
2808        cSource = cBuf;
2809        cTargetLimit = cBuf;
2810        uTargetLimit = uBuf;
2811
2812        do{
2813
2814            cTargetLimit = cTargetLimit+ i;
2815            ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2816            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2817               errorCode=U_ZERO_ERROR;
2818                continue;
2819            }
2820
2821            if(U_FAILURE(errorCode)){
2822                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2823                return;
2824            }
2825
2826        }while (uSource<uSourceLimit);
2827
2828        cSourceLimit =cTarget;
2829        do{
2830            uTargetLimit=uTargetLimit+i;
2831            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2832            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2833               errorCode=U_ZERO_ERROR;
2834                continue;
2835            }
2836            if(U_FAILURE(errorCode)){
2837                   log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2838                    return;
2839            }
2840        }while(cSource<cSourceLimit);
2841
2842        uSource = source;
2843        /*test =uBuf;*/
2844        for(len=0;len<(int)(source - sourceLimit);len++){
2845            if(uBuf[len]!=uSource[len]){
2846                log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2847            }
2848        }
2849    }
2850    free(uBuf);
2851    free(cBuf);
2852}
2853/* Test for Jitterbug 778 */
2854static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2855    const UChar* uSource;
2856    const UChar* uSourceLimit;
2857    const char* cSource;
2858    UChar *uTargetLimit =NULL;
2859    UChar *uTarget;
2860    char *cTarget;
2861    const char *cTargetLimit;
2862    char *cBuf;
2863    UChar *uBuf,*test;
2864    int32_t uBufSize = 120;
2865    int numCharsInTarget=0;
2866    UErrorCode errorCode=U_ZERO_ERROR;
2867    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2868    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2869    uSource = source;
2870    uSourceLimit=sourceLimit;
2871    cTarget = cBuf;
2872    cTargetLimit = cBuf +uBufSize*5;
2873    uTarget = uBuf;
2874    uTargetLimit = uBuf+ uBufSize*5;
2875    ucnv_reset(cnv);
2876    numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2877    if(U_FAILURE(errorCode)){
2878        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2879        return;
2880    }
2881    cSource = cBuf;
2882    test =uBuf;
2883    ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2884    if(U_FAILURE(errorCode)){
2885        log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2886        return;
2887    }
2888    uSource = source;
2889    while(uSource<uSourceLimit){
2890        if(*test!=*uSource){
2891
2892            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2893        }
2894        uSource++;
2895        test++;
2896    }
2897    free(uBuf);
2898    free(cBuf);
2899}
2900
2901static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2902    const UChar* uSource;
2903    const UChar* uSourceLimit;
2904    const char* cSource;
2905    const char* cSourceLimit;
2906    UChar *uTargetLimit =NULL;
2907    UChar *uTarget;
2908    char *cTarget;
2909    const char *cTargetLimit;
2910    char *cBuf;
2911    UChar *uBuf; /*,*test;*/
2912    int32_t uBufSize = 120;
2913    int len=0;
2914    int i=2;
2915    const UChar *temp = sourceLimit;
2916    UErrorCode errorCode=U_ZERO_ERROR;
2917    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2918    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2919
2920    ucnv_reset(cnv);
2921    for(;--i>0;){
2922        uSource = (UChar*) source;
2923        cTarget = cBuf;
2924        uTarget = uBuf;
2925        cSource = cBuf;
2926        cTargetLimit = cBuf;
2927        uTargetLimit = uBuf+uBufSize*5;
2928        cTargetLimit = cTargetLimit+uBufSize*10;
2929        uSourceLimit=uSource;
2930        do{
2931
2932            if (uSourceLimit < sourceLimit) {
2933                uSourceLimit = uSourceLimit+1;
2934            }
2935            ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2936            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2937               errorCode=U_ZERO_ERROR;
2938                continue;
2939            }
2940
2941            if(U_FAILURE(errorCode)){
2942                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2943                return;
2944            }
2945
2946        }while (uSource<temp);
2947
2948        cSourceLimit =cBuf;
2949        do{
2950            if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2951                cSourceLimit = cSourceLimit+1;
2952            }
2953            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2954            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2955               errorCode=U_ZERO_ERROR;
2956                continue;
2957            }
2958            if(U_FAILURE(errorCode)){
2959                   log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2960                    return;
2961            }
2962        }while(cSource<cTarget);
2963
2964        uSource = source;
2965        /*test =uBuf;*/
2966        for(;len<(int)(source - sourceLimit);len++){
2967            if(uBuf[len]!=uSource[len]){
2968                log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2969            }
2970        }
2971    }
2972    free(uBuf);
2973    free(cBuf);
2974}
2975static void
2976TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2977                     const uint16_t results[], const char* message){
2978/*     const char* s0; */
2979     const char* s=(char*)source;
2980     const uint16_t *r=results;
2981     UErrorCode errorCode=U_ZERO_ERROR;
2982     uint32_t c,exC;
2983     ucnv_reset(cnv);
2984     while(s<limit) {
2985	 /* s0=s; */
2986        c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2987        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2988            break; /* no more significant input */
2989        } else if(U_FAILURE(errorCode)) {
2990            log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2991            break;
2992        } else {
2993            if(U16_IS_LEAD(*r)){
2994                int i =0, len = 2;
2995                U16_NEXT(r, i, len, exC);
2996                r++;
2997            }else{
2998                exC = *r;
2999            }
3000            if(c!=(uint32_t)(exC))
3001                log_err("%s ucnv_getNextUChar() Expected:  \\u%04X Got:  \\u%04X \n",message,(uint32_t) (*r),c);
3002        }
3003        r++;
3004    }
3005}
3006
3007static int TestJitterbug930(const char* enc){
3008    UErrorCode err = U_ZERO_ERROR;
3009    UConverter*converter;
3010    char out[80];
3011    char*target = out;
3012    UChar in[4];
3013    const UChar*source = in;
3014    int32_t off[80];
3015    int32_t* offsets = off;
3016    int numOffWritten=0;
3017    UBool flush = 0;
3018    converter = my_ucnv_open(enc, &err);
3019
3020    in[0] = 0x41;     /* 0x4E00;*/
3021    in[1] = 0x4E01;
3022    in[2] = 0x4E02;
3023    in[3] = 0x4E03;
3024
3025    memset(off, '*', sizeof(off));
3026
3027    ucnv_fromUnicode (converter,
3028            &target,
3029            target+2,
3030            &source,
3031            source+3,
3032            offsets,
3033            flush,
3034            &err);
3035
3036        /* writes three bytes into the output buffer: 41 1B 24
3037        * but offsets contains 0 1 1
3038    */
3039    while(*offsets< off[10]){
3040        numOffWritten++;
3041        offsets++;
3042    }
3043    log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3044    if(numOffWritten!= (int)(target-out)){
3045        log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3046    }
3047
3048    err = U_ZERO_ERROR;
3049
3050    memset(off,'*' , sizeof(off));
3051
3052    flush = 1;
3053    offsets=off;
3054    ucnv_fromUnicode (converter,
3055            &target,
3056            target+4,
3057            &source,
3058            source,
3059            offsets,
3060            flush,
3061            &err);
3062    numOffWritten=0;
3063    while(*offsets< off[10]){
3064        numOffWritten++;
3065        if(*offsets!= -1){
3066            log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3067        }
3068        offsets++;
3069    }
3070
3071    /* writes 42 43 7A into output buffer,
3072     * offsets contains -1 -1 -1
3073     */
3074    ucnv_close(converter);
3075    return 0;
3076}
3077
3078static void
3079TestHZ() {
3080    /* test input */
3081    static const uint16_t in[]={
3082            0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3083            0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3084            0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3085            0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3086            0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3087            0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3088            0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3089            0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3090            0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3091            0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3092            0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3093            0x005A, 0x005B, 0x005C, 0x000A
3094      };
3095    const UChar* uSource;
3096    const UChar* uSourceLimit;
3097    const char* cSource;
3098    const char* cSourceLimit;
3099    UChar *uTargetLimit =NULL;
3100    UChar *uTarget;
3101    char *cTarget;
3102    const char *cTargetLimit;
3103    char *cBuf;
3104    UChar *uBuf,*test;
3105    int32_t uBufSize = 120;
3106    UErrorCode errorCode=U_ZERO_ERROR;
3107    UConverter *cnv;
3108    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3109    int32_t* myOff= offsets;
3110    cnv=ucnv_open("HZ", &errorCode);
3111    if(U_FAILURE(errorCode)) {
3112        log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3113        return;
3114    }
3115
3116    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3117    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3118    uSource = (const UChar*)in;
3119    uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3120    cTarget = cBuf;
3121    cTargetLimit = cBuf +uBufSize*5;
3122    uTarget = uBuf;
3123    uTargetLimit = uBuf+ uBufSize*5;
3124    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3125    if(U_FAILURE(errorCode)){
3126        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3127        return;
3128    }
3129    cSource = cBuf;
3130    cSourceLimit =cTarget;
3131    test =uBuf;
3132    myOff=offsets;
3133    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3134    if(U_FAILURE(errorCode)){
3135        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3136        return;
3137    }
3138    uSource = (const UChar*)in;
3139    while(uSource<uSourceLimit){
3140        if(*test!=*uSource){
3141
3142            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3143        }
3144        uSource++;
3145        test++;
3146    }
3147    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3148    TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3149    TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3150    TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3151    TestJitterbug930("csISO2022JP");
3152    ucnv_close(cnv);
3153    free(offsets);
3154    free(uBuf);
3155    free(cBuf);
3156}
3157
3158static void
3159TestISCII(){
3160        /* test input */
3161    static const uint16_t in[]={
3162        /* test full range of Devanagari */
3163        0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3164        0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3165        0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3166        0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3167        0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3168        0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3169        0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3170        0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3171        0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3172        0x096D,0x096E,0x096F,
3173        /* test Soft halant*/
3174        0x0915,0x094d, 0x200D,
3175        /* test explicit halant */
3176        0x0915,0x094d, 0x200c,
3177        /* test double danda */
3178        0x965,
3179        /* test ASCII */
3180        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3181        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3182        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3183        /* tests from Lotus */
3184        0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3185        0x0930,0x094D,0x200D,
3186        0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3187        0x0915,0x0921,0x002B,0x095F,
3188        /* tamil range */
3189        0x0B86, 0xB87, 0xB88,
3190        /* telugu range */
3191        0x0C05, 0x0C02, 0x0C03,0x0c31,
3192        /* kannada range */
3193        0x0C85, 0xC82, 0x0C83,
3194        /* test Abbr sign and Anudatta */
3195        0x0970, 0x952,
3196       /* 0x0958,
3197        0x0959,
3198        0x095A,
3199        0x095B,
3200        0x095C,
3201        0x095D,
3202        0x095E,
3203        0x095F,*/
3204        0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3205        0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3206        0x090C ,
3207        0x0962,
3208        0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3209        0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3210        0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3211        0x093D /* Avagraha  0xEA, 0xE9*/,
3212        0x0958,
3213        0x0959,
3214        0x095A,
3215        0x095B,
3216        0x095C,
3217        0x095D,
3218        0x095E,
3219        0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3220      };
3221    static const unsigned char byteArr[]={
3222
3223        0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3224        0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3225        0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3226        0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3227        0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3228        0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3229        0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3230        0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3231        0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3232        0xf8,0xf9,0xfa,
3233        /* test soft halant */
3234        0xb3, 0xE8, 0xE9,
3235        /* test explicit halant */
3236        0xb3, 0xE8, 0xE8,
3237        /* test double danda */
3238        0xea, 0xea,
3239        /* test ASCII */
3240        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3241        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3242        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3243        /* test ATR code */
3244
3245        /* tests from Lotus */
3246        0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3247        0xEF,0x42,0xCF,0xE8,0xD9,
3248        0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3249        0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3250        /* tamil range */
3251        0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3252        /* telugu range */
3253        0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3254        /* kannada range */
3255        0xEF, 0x48,0xa4, 0xa2, 0xa3,
3256        /* anudatta and abbreviation sign */
3257        0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3258
3259
3260        0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3261
3262        0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3263
3264        0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3265
3266        0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3267
3268        0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3269
3270        0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3271
3272        0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3273
3274        0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3275
3276        0xB3, 0xE9, /* Ka + NUKTA */
3277
3278        0xB4, 0xE9, /* Kha + NUKTA */
3279
3280        0xB5, 0xE9, /* Ga + NUKTA */
3281
3282        0xBA, 0xE9,
3283
3284        0xBF, 0xE9,
3285
3286        0xC0, 0xE9,
3287
3288        0xC9, 0xE9,
3289        /* INV halant RA    */
3290        0xD9, 0xE8, 0xCF,
3291        0x00, 0x00A0,
3292        /* just consume unhandled codepoints */
3293        0xEF, 0x30,
3294
3295    };
3296    testConvertToU(byteArr,(sizeof(byteArr)),in,UPRV_LENGTHOF(in),"x-iscii-de",NULL,TRUE);
3297    TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3298
3299}
3300
3301static void
3302TestISO_2022_JP() {
3303    /* test input */
3304    static const uint16_t in[]={
3305        0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3306        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3307        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3308        0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3309        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3310        0x201D, 0x3014, 0x000D, 0x000A,
3311        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3312        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3313        };
3314    const UChar* uSource;
3315    const UChar* uSourceLimit;
3316    const char* cSource;
3317    const char* cSourceLimit;
3318    UChar *uTargetLimit =NULL;
3319    UChar *uTarget;
3320    char *cTarget;
3321    const char *cTargetLimit;
3322    char *cBuf;
3323    UChar *uBuf,*test;
3324    int32_t uBufSize = 120;
3325    UErrorCode errorCode=U_ZERO_ERROR;
3326    UConverter *cnv;
3327    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3328    int32_t* myOff= offsets;
3329    cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3330    if(U_FAILURE(errorCode)) {
3331        log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3332        return;
3333    }
3334
3335    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3336    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3337    uSource = (const UChar*)in;
3338    uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3339    cTarget = cBuf;
3340    cTargetLimit = cBuf +uBufSize*5;
3341    uTarget = uBuf;
3342    uTargetLimit = uBuf+ uBufSize*5;
3343    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3344    if(U_FAILURE(errorCode)){
3345        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3346        return;
3347    }
3348    cSource = cBuf;
3349    cSourceLimit =cTarget;
3350    test =uBuf;
3351    myOff=offsets;
3352    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3353    if(U_FAILURE(errorCode)){
3354        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3355        return;
3356    }
3357
3358    uSource = (const UChar*)in;
3359    while(uSource<uSourceLimit){
3360        if(*test!=*uSource){
3361
3362            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3363        }
3364        uSource++;
3365        test++;
3366    }
3367
3368    TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3369    TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3370    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3371    TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3372    TestJitterbug930("csISO2022JP");
3373    ucnv_close(cnv);
3374    free(uBuf);
3375    free(cBuf);
3376    free(offsets);
3377}
3378
3379static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3380    const UChar* uSource;
3381    const UChar* uSourceLimit;
3382    const char* cSource;
3383    const char* cSourceLimit;
3384    UChar *uTargetLimit =NULL;
3385    UChar *uTarget;
3386    char *cTarget;
3387    const char *cTargetLimit;
3388    char *cBuf;
3389    UChar *uBuf,*test;
3390    int32_t uBufSize = 120*10;
3391    UErrorCode errorCode=U_ZERO_ERROR;
3392    UConverter *cnv;
3393    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3394    int32_t* myOff= offsets;
3395    cnv=my_ucnv_open(conv, &errorCode);
3396    if(U_FAILURE(errorCode)) {
3397        log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3398        return;
3399    }
3400
3401    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
3402    cBuf =(char*)malloc(uBufSize * sizeof(char));
3403    uSource = (const UChar*)in;
3404    uSourceLimit=uSource+len;
3405    cTarget = cBuf;
3406    cTargetLimit = cBuf +uBufSize;
3407    uTarget = uBuf;
3408    uTargetLimit = uBuf+ uBufSize;
3409    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3410    if(U_FAILURE(errorCode)){
3411        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3412        return;
3413    }
3414    /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3415    cSource = cBuf;
3416    cSourceLimit =cTarget;
3417    test =uBuf;
3418    myOff=offsets;
3419    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3420    if(U_FAILURE(errorCode)){
3421        log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3422        return;
3423    }
3424
3425    uSource = (const UChar*)in;
3426    while(uSource<uSourceLimit){
3427        if(*test!=*uSource){
3428            log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3429        }
3430        uSource++;
3431        test++;
3432    }
3433    TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3434    TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3435    TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3436    if(byteArr && byteArrLen!=0){
3437        TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3438        TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3439        {
3440            cSource = byteArr;
3441            cSourceLimit = cSource+byteArrLen;
3442            test=uBuf;
3443            myOff = offsets;
3444            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3445            if(U_FAILURE(errorCode)){
3446                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3447                return;
3448            }
3449
3450            uSource = (const UChar*)in;
3451            while(uSource<uSourceLimit){
3452                if(*test!=*uSource){
3453                    log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3454                }
3455                uSource++;
3456                test++;
3457            }
3458        }
3459    }
3460
3461    ucnv_close(cnv);
3462    free(uBuf);
3463    free(cBuf);
3464    free(offsets);
3465}
3466static UChar U_CALLCONV
3467_charAt(int32_t offset, void *context) {
3468    return ((char*)context)[offset];
3469}
3470
3471static int32_t
3472unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3473    int32_t srcIndex=0;
3474    int32_t dstIndex=0;
3475    if(U_FAILURE(*status)){
3476        return 0;
3477    }
3478    if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3479        *status = U_ILLEGAL_ARGUMENT_ERROR;
3480        return 0;
3481    }
3482    if(srcLen==-1){
3483        srcLen = (int32_t)uprv_strlen(src);
3484    }
3485
3486    for (; srcIndex<srcLen; ) {
3487        UChar32 c = src[srcIndex++];
3488        if (c == 0x005C /*'\\'*/) {
3489            c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3490            if (c == (UChar32)0xFFFFFFFF) {
3491                *status=U_INVALID_CHAR_FOUND; /* return empty string */
3492                break; /* invalid escape sequence */
3493            }
3494        }
3495        if(dstIndex < dstLen){
3496            if(c>0xFFFF){
3497               dst[dstIndex++] = U16_LEAD(c);
3498               if(dstIndex<dstLen){
3499                    dst[dstIndex]=U16_TRAIL(c);
3500               }else{
3501                   *status=U_BUFFER_OVERFLOW_ERROR;
3502               }
3503            }else{
3504                dst[dstIndex]=(UChar)c;
3505            }
3506
3507        }else{
3508            *status = U_BUFFER_OVERFLOW_ERROR;
3509        }
3510        dstIndex++; /* for preflighting */
3511    }
3512    return dstIndex;
3513}
3514
3515static void
3516TestFullRoundtrip(const char* cp){
3517    UChar usource[10] ={0};
3518    UChar nsrc[10] = {0};
3519    uint32_t i=1;
3520    int len=0, ulen;
3521    nsrc[0]=0x0061;
3522    /* Test codepoint 0 */
3523    TestConv(usource,1,cp,"",NULL,0);
3524    TestConv(usource,2,cp,"",NULL,0);
3525    nsrc[2]=0x5555;
3526    TestConv(nsrc,3,cp,"",NULL,0);
3527
3528    for(;i<=0x10FFFF;i++){
3529        if(i==0xD800){
3530            i=0xDFFF;
3531            continue;
3532        }
3533        if(i<=0xFFFF){
3534            usource[0] =(UChar) i;
3535            len=1;
3536        }else{
3537            usource[0]=U16_LEAD(i);
3538            usource[1]=U16_TRAIL(i);
3539            len=2;
3540        }
3541        ulen=len;
3542        if(i==0x80) {
3543            usource[2]=0;
3544        }
3545        /* Test only single code points */
3546        TestConv(usource,ulen,cp,"",NULL,0);
3547        /* Test codepoint repeated twice */
3548        usource[ulen]=usource[0];
3549        usource[ulen+1]=usource[1];
3550        ulen+=len;
3551        TestConv(usource,ulen,cp,"",NULL,0);
3552        /* Test codepoint repeated 3 times */
3553        usource[ulen]=usource[0];
3554        usource[ulen+1]=usource[1];
3555        ulen+=len;
3556        TestConv(usource,ulen,cp,"",NULL,0);
3557        /* Test codepoint in between 2 codepoints */
3558        nsrc[1]=usource[0];
3559        nsrc[2]=usource[1];
3560        nsrc[len+1]=0x5555;
3561        TestConv(nsrc,len+2,cp,"",NULL,0);
3562        uprv_memset(usource,0,sizeof(UChar)*10);
3563    }
3564}
3565
3566static void
3567TestRoundTrippingAllUTF(void){
3568    if(!getTestOption(QUICK_OPTION)){
3569        log_verbose("Running exhaustive round trip test for BOCU-1\n");
3570        TestFullRoundtrip("BOCU-1");
3571        log_verbose("Running exhaustive round trip test for SCSU\n");
3572        TestFullRoundtrip("SCSU");
3573        log_verbose("Running exhaustive round trip test for UTF-8\n");
3574        TestFullRoundtrip("UTF-8");
3575        log_verbose("Running exhaustive round trip test for CESU-8\n");
3576        TestFullRoundtrip("CESU-8");
3577        log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3578        TestFullRoundtrip("UTF-16BE");
3579        log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3580        TestFullRoundtrip("UTF-16LE");
3581        log_verbose("Running exhaustive round trip test for UTF-16\n");
3582        TestFullRoundtrip("UTF-16");
3583        log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3584        TestFullRoundtrip("UTF-32BE");
3585        log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3586        TestFullRoundtrip("UTF-32LE");
3587        log_verbose("Running exhaustive round trip test for UTF-32\n");
3588        TestFullRoundtrip("UTF-32");
3589        log_verbose("Running exhaustive round trip test for UTF-7\n");
3590        TestFullRoundtrip("UTF-7");
3591        log_verbose("Running exhaustive round trip test for UTF-7\n");
3592        TestFullRoundtrip("UTF-7,version=1");
3593        log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3594        TestFullRoundtrip("IMAP-mailbox-name");
3595        /*
3596         *
3597         * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3598         * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3599         * The old mappings remain as fallbacks.
3600         * This test may be reintroduced at a later time.
3601         *
3602         * 110118 - mow
3603         */
3604         /*
3605         log_verbose("Running exhaustive round trip test for GB18030\n");
3606         TestFullRoundtrip("GB18030");
3607         */
3608    }
3609}
3610
3611static void
3612TestSCSU() {
3613
3614    static const uint16_t germanUTF16[]={
3615        0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3616    };
3617
3618    static const uint8_t germanSCSU[]={
3619        0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3620    };
3621
3622    static const uint16_t russianUTF16[]={
3623        0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3624    };
3625
3626    static const uint8_t russianSCSU[]={
3627        0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3628    };
3629
3630    static const uint16_t japaneseUTF16[]={
3631        0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3632        0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3633        0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3634        0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3635        0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3636        0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3637        0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3638        0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3639        0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3640        0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3641        0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3642        0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3643        0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3644        0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3645        0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3646    };
3647
3648    /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3649     it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3650    static const uint8_t japaneseSCSU[]={
3651        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3652        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3653        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3654        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3655        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3656        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3657        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3658        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3659        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3660        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3661        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3662        0xcb, 0x82
3663    };
3664
3665    static const uint16_t allFeaturesUTF16[]={
3666        0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3667        0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3668        0x01df, 0xf000, 0xdbff, 0xdfff
3669    };
3670
3671    /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3672     * result here (34B vs. 35B)
3673     */
3674    static const uint8_t allFeaturesSCSU[]={
3675        0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3676        0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3677        0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3678        0xdf, 0x14, 0x80, 0x15, 0xff
3679    };
3680    static const uint16_t monkeyIn[]={
3681        0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3682        0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3683        0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3684        0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3685        0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3686        0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3687        0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3688        0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3689        0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3690        0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3691        0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3692        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3693        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3694        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3695        0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3696        0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3697        0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3698        0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3699        0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3700        /* test non-BMP code points */
3701        0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3702        0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3703        0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3704        0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3705        0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3706        0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3707        0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3708        0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3709        0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3710        0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3711        0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3712
3713
3714        0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3715        0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3716        0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3717        0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3718        0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3719    };
3720    static const char *fTestCases [] = {
3721          "\\ud800\\udc00", /* smallest surrogate*/
3722          "\\ud8ff\\udcff",
3723          "\\udBff\\udFff", /* largest surrogate pair*/
3724          "\\ud834\\udc00",
3725          "\\U0010FFFF",
3726          "Hello \\u9292 \\u9192 World!",
3727          "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3728          "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3729
3730          "\\u0648\\u06c8", /* catch missing reset*/
3731          "\\u0648\\u06c8",
3732
3733          "\\u4444\\uE001", /* lowest quotable*/
3734          "\\u4444\\uf2FF", /* highest quotable*/
3735          "\\u4444\\uf188\\u4444",
3736          "\\u4444\\uf188\\uf288",
3737          "\\u4444\\uf188abc\\u0429\\uf288",
3738          "\\u9292\\u2222",
3739          "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3740          "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3741          "Hello World!123456",
3742          "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3743
3744          "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
3745          "abc\\u4411d",      /* uses SQU*/
3746          "abc\\u4411\\u4412d",/* uses SCU*/
3747          "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3748          "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3749          "\\u9292\\u2222",
3750          "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3751          "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3752          "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3753
3754          "", /* empty input*/
3755          "\\u0000", /* smallest BMP character*/
3756          "\\uFFFF", /* largest BMP character*/
3757
3758          /* regression tests*/
3759          "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3760          "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3761          "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3762          "\\u0041\\u00df\\u0401\\u015f",
3763          "\\u9066\\u2123abc",
3764          "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3765          "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3766    };
3767    int i=0;
3768    for(;i<UPRV_LENGTHOF(fTestCases);i++){
3769        const char* cSrc = fTestCases[i];
3770        UErrorCode status = U_ZERO_ERROR;
3771        int32_t cSrcLen,srcLen;
3772        UChar* src;
3773        /* UConverter* cnv = ucnv_open("SCSU",&status); */
3774        cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3775        src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3776        srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3777        log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3778        TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3779        free(src);
3780    }
3781    TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3782    TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3783    TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3784    TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3785    TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3786    TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3787    TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3788}
3789
3790#if !UCONFIG_NO_LEGACY_CONVERSION
3791static void TestJitterbug2346(){
3792    char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3793                      0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3794    uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3795
3796    UChar uTarget[500]={'\0'};
3797    UChar* utarget=uTarget;
3798    UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3799
3800    char cTarget[500]={'\0'};
3801    char* ctarget=cTarget;
3802    char* ctargetLimit=cTarget+sizeof(cTarget);
3803    const char* csource=source;
3804    UChar* temp = expected;
3805    UErrorCode err=U_ZERO_ERROR;
3806
3807    UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3808    if(U_FAILURE(err)) {
3809        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3810        return;
3811    }
3812    ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3813    if(U_FAILURE(err)) {
3814        log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3815        return;
3816    }
3817    utargetLimit=utarget;
3818    utarget = uTarget;
3819    while(utarget<utargetLimit){
3820        if(*temp!=*utarget){
3821
3822            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3823        }
3824        utarget++;
3825        temp++;
3826    }
3827    ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3828    if(U_FAILURE(err)) {
3829        log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3830        return;
3831    }
3832    ctargetLimit=ctarget;
3833    ctarget =cTarget;
3834    ucnv_close(conv);
3835
3836
3837}
3838
3839static void
3840TestISO_2022_JP_1() {
3841    /* test input */
3842    static const uint16_t in[]={
3843        0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3844        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3845        0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3846        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3847        0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3848        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3849        0x201D, 0x000D, 0x000A,
3850        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3851        0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3852        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3853        0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3854        0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3855        0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3856      };
3857    const UChar* uSource;
3858    const UChar* uSourceLimit;
3859    const char* cSource;
3860    const char* cSourceLimit;
3861    UChar *uTargetLimit =NULL;
3862    UChar *uTarget;
3863    char *cTarget;
3864    const char *cTargetLimit;
3865    char *cBuf;
3866    UChar *uBuf,*test;
3867    int32_t uBufSize = 120;
3868    UErrorCode errorCode=U_ZERO_ERROR;
3869    UConverter *cnv;
3870
3871    cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3872    if(U_FAILURE(errorCode)) {
3873        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3874        return;
3875    }
3876
3877    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3878    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3879    uSource = (const UChar*)in;
3880    uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3881    cTarget = cBuf;
3882    cTargetLimit = cBuf +uBufSize*5;
3883    uTarget = uBuf;
3884    uTargetLimit = uBuf+ uBufSize*5;
3885    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3886    if(U_FAILURE(errorCode)){
3887        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3888        return;
3889    }
3890    cSource = cBuf;
3891    cSourceLimit =cTarget;
3892    test =uBuf;
3893    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3894    if(U_FAILURE(errorCode)){
3895        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3896        return;
3897    }
3898    uSource = (const UChar*)in;
3899    while(uSource<uSourceLimit){
3900        if(*test!=*uSource){
3901
3902            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3903        }
3904        uSource++;
3905        test++;
3906    }
3907    /*ucnv_close(cnv);
3908    cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3909    /*Test for the condition where there is an invalid character*/
3910    ucnv_reset(cnv);
3911    {
3912        static const uint8_t source2[]={0x0e,0x24,0x053};
3913        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3914    }
3915    TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3916    TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3917    ucnv_close(cnv);
3918    free(uBuf);
3919    free(cBuf);
3920}
3921
3922static void
3923TestISO_2022_JP_2() {
3924    /* test input */
3925    static const uint16_t in[]={
3926        0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3927        0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3928        0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3929        0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3930        0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3931        0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3932        0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3933        0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3934        0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3935        0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3936        0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3937        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3938        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3939        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3940        0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3941        0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3942        0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3943        0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3944        0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3945      };
3946    const UChar* uSource;
3947    const UChar* uSourceLimit;
3948    const char* cSource;
3949    const char* cSourceLimit;
3950    UChar *uTargetLimit =NULL;
3951    UChar *uTarget;
3952    char *cTarget;
3953    const char *cTargetLimit;
3954    char *cBuf;
3955    UChar *uBuf,*test;
3956    int32_t uBufSize = 120;
3957    UErrorCode errorCode=U_ZERO_ERROR;
3958    UConverter *cnv;
3959    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3960    int32_t* myOff= offsets;
3961    cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3962    if(U_FAILURE(errorCode)) {
3963        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3964        return;
3965    }
3966
3967    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3968    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3969    uSource = (const UChar*)in;
3970    uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3971    cTarget = cBuf;
3972    cTargetLimit = cBuf +uBufSize*5;
3973    uTarget = uBuf;
3974    uTargetLimit = uBuf+ uBufSize*5;
3975    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3976    if(U_FAILURE(errorCode)){
3977        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3978        return;
3979    }
3980    cSource = cBuf;
3981    cSourceLimit =cTarget;
3982    test =uBuf;
3983    myOff=offsets;
3984    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3985    if(U_FAILURE(errorCode)){
3986        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3987        return;
3988    }
3989    uSource = (const UChar*)in;
3990    while(uSource<uSourceLimit){
3991        if(*test!=*uSource){
3992
3993            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3994        }
3995        uSource++;
3996        test++;
3997    }
3998    TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3999    TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4000    TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4001    /*Test for the condition where there is an invalid character*/
4002    ucnv_reset(cnv);
4003    {
4004        static const uint8_t source2[]={0x0e,0x24,0x053};
4005        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
4006    }
4007    ucnv_close(cnv);
4008    free(uBuf);
4009    free(cBuf);
4010    free(offsets);
4011}
4012
4013static void
4014TestISO_2022_KR() {
4015    /* test input */
4016    static const uint16_t in[]={
4017                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4018                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4019                   ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4020                   ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4021                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4022                   ,0x53E3,0x53E4,0x000A,0x000D};
4023    const UChar* uSource;
4024    const UChar* uSourceLimit;
4025    const char* cSource;
4026    const char* cSourceLimit;
4027    UChar *uTargetLimit =NULL;
4028    UChar *uTarget;
4029    char *cTarget;
4030    const char *cTargetLimit;
4031    char *cBuf;
4032    UChar *uBuf,*test;
4033    int32_t uBufSize = 120;
4034    UErrorCode errorCode=U_ZERO_ERROR;
4035    UConverter *cnv;
4036    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4037    int32_t* myOff= offsets;
4038    cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4039    if(U_FAILURE(errorCode)) {
4040        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4041        return;
4042    }
4043
4044    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4045    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4046    uSource = (const UChar*)in;
4047    uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4048    cTarget = cBuf;
4049    cTargetLimit = cBuf +uBufSize*5;
4050    uTarget = uBuf;
4051    uTargetLimit = uBuf+ uBufSize*5;
4052    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4053    if(U_FAILURE(errorCode)){
4054        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4055        return;
4056    }
4057    cSource = cBuf;
4058    cSourceLimit =cTarget;
4059    test =uBuf;
4060    myOff=offsets;
4061    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4062    if(U_FAILURE(errorCode)){
4063        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4064        return;
4065    }
4066    uSource = (const UChar*)in;
4067    while(uSource<uSourceLimit){
4068        if(*test!=*uSource){
4069            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4070        }
4071        uSource++;
4072        test++;
4073    }
4074    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4075    TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4076    TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4077    TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4078    TestJitterbug930("csISO2022KR");
4079    /*Test for the condition where there is an invalid character*/
4080    ucnv_reset(cnv);
4081    {
4082        static const uint8_t source2[]={0x1b,0x24,0x053};
4083        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4084        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4085    }
4086    ucnv_close(cnv);
4087    free(uBuf);
4088    free(cBuf);
4089    free(offsets);
4090}
4091
4092static void
4093TestISO_2022_KR_1() {
4094    /* test input */
4095    static const uint16_t in[]={
4096                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4097                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4098                   ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4099                   ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4100                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4101                   ,0x53E3,0x53E4,0x000A,0x000D};
4102    const UChar* uSource;
4103    const UChar* uSourceLimit;
4104    const char* cSource;
4105    const char* cSourceLimit;
4106    UChar *uTargetLimit =NULL;
4107    UChar *uTarget;
4108    char *cTarget;
4109    const char *cTargetLimit;
4110    char *cBuf;
4111    UChar *uBuf,*test;
4112    int32_t uBufSize = 120;
4113    UErrorCode errorCode=U_ZERO_ERROR;
4114    UConverter *cnv;
4115    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4116    int32_t* myOff= offsets;
4117    cnv=ucnv_open("ibm-25546", &errorCode);
4118    if(U_FAILURE(errorCode)) {
4119        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4120        return;
4121    }
4122
4123    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4124    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4125    uSource = (const UChar*)in;
4126    uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4127    cTarget = cBuf;
4128    cTargetLimit = cBuf +uBufSize*5;
4129    uTarget = uBuf;
4130    uTargetLimit = uBuf+ uBufSize*5;
4131    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4132    if(U_FAILURE(errorCode)){
4133        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4134        return;
4135    }
4136    cSource = cBuf;
4137    cSourceLimit =cTarget;
4138    test =uBuf;
4139    myOff=offsets;
4140    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4141    if(U_FAILURE(errorCode)){
4142        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4143        return;
4144    }
4145    uSource = (const UChar*)in;
4146    while(uSource<uSourceLimit){
4147        if(*test!=*uSource){
4148            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4149        }
4150        uSource++;
4151        test++;
4152    }
4153    ucnv_reset(cnv);
4154    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4155    TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4156    TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4157    ucnv_reset(cnv);
4158    TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4159        /*Test for the condition where there is an invalid character*/
4160    ucnv_reset(cnv);
4161    {
4162        static const uint8_t source2[]={0x1b,0x24,0x053};
4163        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4164        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4165    }
4166    ucnv_close(cnv);
4167    free(uBuf);
4168    free(cBuf);
4169    free(offsets);
4170}
4171
4172static void TestJitterbug2411(){
4173    static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4174                         "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4175    UConverter* kr=NULL, *kr1=NULL;
4176    UErrorCode errorCode = U_ZERO_ERROR;
4177    UChar tgt[100]={'\0'};
4178    UChar* target = tgt;
4179    UChar* targetLimit = target+100;
4180    kr=ucnv_open("iso-2022-kr", &errorCode);
4181    if(U_FAILURE(errorCode)) {
4182        log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4183        return;
4184    }
4185    ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4186    if(U_FAILURE(errorCode)) {
4187        log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4188        return;
4189    }
4190    kr1 = ucnv_open("ibm-25546", &errorCode);
4191    if(U_FAILURE(errorCode)) {
4192        log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4193        return;
4194    }
4195    target = tgt;
4196    targetLimit = target+100;
4197    ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4198
4199    if(U_FAILURE(errorCode)) {
4200        log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4201        return;
4202    }
4203
4204    ucnv_close(kr);
4205    ucnv_close(kr1);
4206
4207}
4208
4209static void
4210TestJIS(){
4211    /* From Unicode moved to testdata/conversion.txt */
4212    /*To Unicode*/
4213    {
4214        static const uint8_t sampleTextJIS[] = {
4215            0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4216            0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4217            0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4218        };
4219        static const uint16_t expectedISO2022JIS[] = {
4220            0x0041, 0x0042,
4221            0xFF81, 0xFF82,
4222            0x3000
4223        };
4224        static const int32_t  toISO2022JISOffs[]={
4225            3,4,
4226            8,9,
4227            16
4228        };
4229
4230        static const uint8_t sampleTextJIS7[] = {
4231            0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4232            0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4233            0x1b,0x24,0x42,0x21,0x21,
4234            0x0e,0x41,0x42,0x0f,      /*Test Katakana set with SI and SO */
4235            0x21,0x22,
4236            0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4237        };
4238        static const uint16_t expectedISO2022JIS7[] = {
4239            0x0041, 0x0042,
4240            0xFF81, 0xFF82,
4241            0x3000,
4242            0xFF81, 0xFF82,
4243            0x3001,
4244            0x3000
4245        };
4246        static const int32_t  toISO2022JIS7Offs[]={
4247            3,4,
4248            8,9,
4249            13,16,
4250            17,
4251            19,27
4252        };
4253        static const uint8_t sampleTextJIS8[] = {
4254            0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4255            0xa1,0xc8,0xd9,/*Katakana Set*/
4256            0x1b,0x28,0x42,
4257            0x41,0x42,
4258            0xb1,0xc3, /*Katakana Set*/
4259            0x1b,0x24,0x42,0x21,0x21
4260        };
4261        static const uint16_t expectedISO2022JIS8[] = {
4262            0x0041, 0x0042,
4263            0xff61, 0xff88, 0xff99,
4264            0x0041, 0x0042,
4265            0xff71, 0xff83,
4266            0x3000
4267        };
4268        static const int32_t  toISO2022JIS8Offs[]={
4269            3, 4,  5,  6,
4270            7, 11, 12, 13,
4271            14, 18,
4272        };
4273
4274        testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4275            UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,TRUE);
4276        testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4277            UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,TRUE);
4278        testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4279            UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,TRUE);
4280    }
4281
4282}
4283
4284
4285#if 0
4286 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4287
4288static void TestJitterbug915(){
4289/* tests for roundtripping of the below sequence
4290\x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+          / *plane 1 * /
4291\x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4292\x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4293\x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4294\x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4295\x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4296\x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4297*/
4298    static const char cSource[]={
4299        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4300        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4301        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4302        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4303        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4304        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4305        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4306        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4307        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4308        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4309        0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4310        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4311        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4312        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4313        0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4314        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4315        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4316        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4317        0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4318        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4319        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4320        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4321        0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4322        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4323        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4324        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4325        0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4326        0x37, 0x20, 0x2A, 0x2F
4327    };
4328    UChar uTarget[500]={'\0'};
4329    UChar* utarget=uTarget;
4330    UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4331
4332    char cTarget[500]={'\0'};
4333    char* ctarget=cTarget;
4334    char* ctargetLimit=cTarget+sizeof(cTarget);
4335    const char* csource=cSource;
4336    const char* tempSrc = cSource;
4337    UErrorCode err=U_ZERO_ERROR;
4338
4339    UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4340    if(U_FAILURE(err)) {
4341        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4342        return;
4343    }
4344    ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4345    if(U_FAILURE(err)) {
4346        log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4347        return;
4348    }
4349    utargetLimit=utarget;
4350    utarget = uTarget;
4351    ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4352    if(U_FAILURE(err)) {
4353        log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4354        return;
4355    }
4356    ctargetLimit=ctarget;
4357    ctarget =cTarget;
4358    while(ctarget<ctargetLimit){
4359        if(*ctarget != *tempSrc){
4360            log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4361        }
4362        ++ctarget;
4363        ++tempSrc;
4364    }
4365
4366    ucnv_close(conv);
4367}
4368
4369static void
4370TestISO_2022_CN_EXT() {
4371    /* test input */
4372    static const uint16_t in[]={
4373                /* test Non-BMP code points */
4374         0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4375         0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4376         0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4377         0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4378         0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4379         0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4380         0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4381         0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4382         0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4383         0xD869, 0xDED5,
4384
4385         0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4386         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4387         0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4388         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4389         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4390         0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4391         0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4392         0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4393         0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4394         0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4395         0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4396         0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4397         0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4398         0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4399         0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4400         0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4401         0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4402         0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4403
4404         0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4405
4406      };
4407
4408    const UChar* uSource;
4409    const UChar* uSourceLimit;
4410    const char* cSource;
4411    const char* cSourceLimit;
4412    UChar *uTargetLimit =NULL;
4413    UChar *uTarget;
4414    char *cTarget;
4415    const char *cTargetLimit;
4416    char *cBuf;
4417    UChar *uBuf,*test;
4418    int32_t uBufSize = 180;
4419    UErrorCode errorCode=U_ZERO_ERROR;
4420    UConverter *cnv;
4421    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4422    int32_t* myOff= offsets;
4423    cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4424    if(U_FAILURE(errorCode)) {
4425        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4426        return;
4427    }
4428
4429    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4430    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4431    uSource = (const UChar*)in;
4432    uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4433    cTarget = cBuf;
4434    cTargetLimit = cBuf +uBufSize*5;
4435    uTarget = uBuf;
4436    uTargetLimit = uBuf+ uBufSize*5;
4437    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4438    if(U_FAILURE(errorCode)){
4439        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4440        return;
4441    }
4442    cSource = cBuf;
4443    cSourceLimit =cTarget;
4444    test =uBuf;
4445    myOff=offsets;
4446    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4447    if(U_FAILURE(errorCode)){
4448        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4449        return;
4450    }
4451    uSource = (const UChar*)in;
4452    while(uSource<uSourceLimit){
4453        if(*test!=*uSource){
4454            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4455        }
4456        else{
4457            log_verbose("      Got: \\u%04X\n",(int)*test) ;
4458        }
4459        uSource++;
4460        test++;
4461    }
4462    TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4463    TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4464    /*Test for the condition where there is an invalid character*/
4465    ucnv_reset(cnv);
4466    {
4467        static const uint8_t source2[]={0x0e,0x24,0x053};
4468        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4469    }
4470    ucnv_close(cnv);
4471    free(uBuf);
4472    free(cBuf);
4473    free(offsets);
4474}
4475#endif
4476
4477static void
4478TestISO_2022_CN() {
4479    /* test input */
4480    static const uint16_t in[]={
4481         /* jitterbug 951 */
4482         0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4483         0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4484         0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4485         0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4486         0x0020, 0x0045, 0x004e, 0x0044,
4487         /**/
4488         0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4489         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4490         0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4491         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4492         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4493         0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4494         0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4495         0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4496         0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4497         0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4498         0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4499         0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4500         0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4501         0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4502         0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4503         0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4504         0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4505
4506      };
4507    const UChar* uSource;
4508    const UChar* uSourceLimit;
4509    const char* cSource;
4510    const char* cSourceLimit;
4511    UChar *uTargetLimit =NULL;
4512    UChar *uTarget;
4513    char *cTarget;
4514    const char *cTargetLimit;
4515    char *cBuf;
4516    UChar *uBuf,*test;
4517    int32_t uBufSize = 180;
4518    UErrorCode errorCode=U_ZERO_ERROR;
4519    UConverter *cnv;
4520    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4521    int32_t* myOff= offsets;
4522    cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4523    if(U_FAILURE(errorCode)) {
4524        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4525        return;
4526    }
4527
4528    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4529    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4530    uSource = (const UChar*)in;
4531    uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4532    cTarget = cBuf;
4533    cTargetLimit = cBuf +uBufSize*5;
4534    uTarget = uBuf;
4535    uTargetLimit = uBuf+ uBufSize*5;
4536    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4537    if(U_FAILURE(errorCode)){
4538        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4539        return;
4540    }
4541    cSource = cBuf;
4542    cSourceLimit =cTarget;
4543    test =uBuf;
4544    myOff=offsets;
4545    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4546    if(U_FAILURE(errorCode)){
4547        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4548        return;
4549    }
4550    uSource = (const UChar*)in;
4551    while(uSource<uSourceLimit){
4552        if(*test!=*uSource){
4553            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4554        }
4555        else{
4556            log_verbose("      Got: \\u%04X\n",(int)*test) ;
4557        }
4558        uSource++;
4559        test++;
4560    }
4561    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4562    TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4563    TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4564    TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4565    TestJitterbug930("csISO2022CN");
4566    /*Test for the condition where there is an invalid character*/
4567    ucnv_reset(cnv);
4568    {
4569        static const uint8_t source2[]={0x0e,0x24,0x053};
4570        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4571    }
4572
4573    ucnv_close(cnv);
4574    free(uBuf);
4575    free(cBuf);
4576    free(offsets);
4577}
4578
4579/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4580typedef struct {
4581    const char *    converterName;
4582    const char *    inputText;
4583    int             inputTextLength;
4584} EmptySegmentTest;
4585
4586/* Callback for TestJitterbug6175, should only get called for empty segment errors */
4587static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4588                                             int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4589    if (reason > UCNV_IRREGULAR) {
4590        return;
4591    }
4592    if (reason != UCNV_IRREGULAR) {
4593        log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4594    }
4595    /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4596    *err = U_ZERO_ERROR;
4597    ucnv_cbToUWriteSub(toArgs,0,err);
4598}
4599
4600enum { kEmptySegmentToUCharsMax = 64 };
4601static void TestJitterbug6175(void) {
4602    static const char  iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4603    static const char  iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4604    static const char  iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4605    static const char  iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4606    static const char  hzGB2312_a[]  = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4607    static const EmptySegmentTest emptySegmentTests[] = {
4608        /* converterName inputText    inputTextLength */
4609        { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4610        { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4611        { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4612        { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4613        { "HZ-GB-2312",  hzGB2312_a,  sizeof(hzGB2312_a)  },
4614        /* terminator: */
4615        { NULL,          NULL,        0,                  }
4616    };
4617    const EmptySegmentTest * testPtr;
4618    for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4619        UErrorCode   err = U_ZERO_ERROR;
4620        UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4621        if (U_FAILURE(err)) {
4622            log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4623            return;
4624        }
4625        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4626        if (U_FAILURE(err)) {
4627            log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4628            ucnv_close(cnv);
4629            return;
4630        }
4631        {
4632            UChar         toUChars[kEmptySegmentToUCharsMax];
4633            UChar *       toUCharsPtr = toUChars;
4634            const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4635            const char *  inCharsPtr = testPtr->inputText;
4636            const char *  inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4637            ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4638        }
4639        ucnv_close(cnv);
4640    }
4641}
4642
4643static void
4644TestEBCDIC_STATEFUL() {
4645    /* test input */
4646    static const uint8_t in[]={
4647        0x61,
4648        0x1a,
4649        0x0f, 0x4b,
4650        0x42,
4651        0x40,
4652        0x36,
4653    };
4654
4655    /* expected test results */
4656    static const int32_t results[]={
4657        /* number of bytes read, code point */
4658        1, 0x002f,
4659        1, 0x0092,
4660        2, 0x002e,
4661        1, 0xff62,
4662        1, 0x0020,
4663        1, 0x0096,
4664
4665    };
4666    static const uint8_t in2[]={
4667        0x0f,
4668        0xa1,
4669        0x01
4670    };
4671
4672    /* expected test results */
4673    static const int32_t results2[]={
4674        /* number of bytes read, code point */
4675        2, 0x203E,
4676        1, 0x0001,
4677    };
4678
4679    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4680    UErrorCode errorCode=U_ZERO_ERROR;
4681    UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4682    if(U_FAILURE(errorCode)) {
4683        log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4684        return;
4685    }
4686    TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4687    ucnv_reset(cnv);
4688     /* Test the condition when source >= sourceLimit */
4689    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4690    ucnv_reset(cnv);
4691    /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4692    {
4693        static const uint8_t source1[]={0x0f};
4694        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4695    }
4696    /*Test for the condition where there is an invalid character*/
4697    ucnv_reset(cnv);
4698    {
4699        static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4700        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4701    }
4702    ucnv_reset(cnv);
4703    source=(const char*)in2;
4704    limit=(const char*)in2+sizeof(in2);
4705    TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4706    ucnv_close(cnv);
4707
4708}
4709
4710static void
4711TestGB18030() {
4712    /* test input */
4713    static const uint8_t in[]={
4714        0x24,
4715        0x7f,
4716        0x81, 0x30, 0x81, 0x30,
4717        0xa8, 0xbf,
4718        0xa2, 0xe3,
4719        0xd2, 0xbb,
4720        0x82, 0x35, 0x8f, 0x33,
4721        0x84, 0x31, 0xa4, 0x39,
4722        0x90, 0x30, 0x81, 0x30,
4723        0xe3, 0x32, 0x9a, 0x35
4724#if 0
4725        /*
4726         * Feature removed   markus 2000-oct-26
4727         * Only some codepages must match surrogate pairs into supplementary code points -
4728         * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4729         * GB 18030 provides direct encodings for supplementary code points, therefore
4730         * it must not combine two single-encoded surrogates into one code point.
4731         */
4732        0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4733#endif
4734    };
4735
4736    /* expected test results */
4737    static const int32_t results[]={
4738        /* number of bytes read, code point */
4739        1, 0x24,
4740        1, 0x7f,
4741        4, 0x80,
4742        2, 0x1f9,
4743        2, 0x20ac,
4744        2, 0x4e00,
4745        4, 0x9fa6,
4746        4, 0xffff,
4747        4, 0x10000,
4748        4, 0x10ffff
4749#if 0
4750        /* Feature removed. See comment above. */
4751        8, 0x10000
4752#endif
4753    };
4754
4755/*    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4756    UErrorCode errorCode=U_ZERO_ERROR;
4757    UConverter *cnv=ucnv_open("gb18030", &errorCode);
4758    if(U_FAILURE(errorCode)) {
4759        log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4760        return;
4761    }
4762    TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4763    ucnv_close(cnv);
4764}
4765
4766static void
4767TestLMBCS() {
4768    /* LMBCS-1 string */
4769    static const uint8_t pszLMBCS[]={
4770        0x61,
4771        0x01, 0x29,
4772        0x81,
4773        0xA0,
4774        0x0F, 0x27,
4775        0x0F, 0x91,
4776        0x14, 0x0a, 0x74,
4777        0x14, 0xF6, 0x02,
4778        0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4779        0x10, 0x88, 0xA0,
4780    };
4781
4782    /* Unicode UChar32 equivalents */
4783    static const UChar32 pszUnicode32[]={
4784        /* code point */
4785        0x00000061,
4786        0x00002013,
4787        0x000000FC,
4788        0x000000E1,
4789        0x00000007,
4790        0x00000091,
4791        0x00000a74,
4792        0x00000200,
4793        0x00023456, /* code point for surrogate pair */
4794        0x00005516
4795    };
4796
4797/* Unicode UChar equivalents */
4798    static const UChar pszUnicode[]={
4799        /* code point */
4800        0x0061,
4801        0x2013,
4802        0x00FC,
4803        0x00E1,
4804        0x0007,
4805        0x0091,
4806        0x0a74,
4807        0x0200,
4808        0xD84D, /* low surrogate */
4809        0xDC56, /* high surrogate */
4810        0x5516
4811    };
4812
4813/* expected test results */
4814    static const int offsets32[]={
4815        /* number of bytes read, code point */
4816        0,
4817        1,
4818        3,
4819        4,
4820        5,
4821        7,
4822        9,
4823        12,
4824        15,
4825        21,
4826        24
4827    };
4828
4829/* expected test results */
4830    static const int offsets[]={
4831        /* number of bytes read, code point */
4832        0,
4833        1,
4834        3,
4835        4,
4836        5,
4837        7,
4838        9,
4839        12,
4840        15,
4841        18,
4842        21,
4843        24
4844    };
4845
4846
4847    UConverter *cnv;
4848
4849#define NAME_LMBCS_1 "LMBCS-1"
4850#define NAME_LMBCS_2 "LMBCS-2"
4851
4852
4853   /* Some basic open/close/property tests on some LMBCS converters */
4854    {
4855
4856      char expected_subchars[] = {0x3F};   /* ANSI Question Mark */
4857      char new_subchars [] = {0x7F};       /* subst char used by SmartSuite..*/
4858      char get_subchars [1];
4859      const char * get_name;
4860      UConverter *cnv1;
4861      UConverter *cnv2;
4862
4863      int8_t len = sizeof(get_subchars);
4864
4865      UErrorCode errorCode=U_ZERO_ERROR;
4866
4867      /* Open */
4868      cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4869      if(U_FAILURE(errorCode)) {
4870         log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4871         return;
4872      }
4873      cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4874      if(U_FAILURE(errorCode)) {
4875         log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4876         return;
4877      }
4878
4879      /* Name */
4880      get_name = ucnv_getName (cnv1, &errorCode);
4881      if (strcmp(NAME_LMBCS_1,get_name)){
4882         log_err("Unexpected converter name: %s\n", get_name);
4883      }
4884      get_name = ucnv_getName (cnv2, &errorCode);
4885      if (strcmp(NAME_LMBCS_2,get_name)){
4886         log_err("Unexpected converter name: %s\n", get_name);
4887      }
4888
4889      /* substitution chars */
4890      ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4891      if(U_FAILURE(errorCode)) {
4892         log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4893      }
4894      if (len!=1){
4895         log_err("Unexpected length of sub chars\n");
4896      }
4897      if (get_subchars[0] != expected_subchars[0]){
4898           log_err("Unexpected value of sub chars\n");
4899      }
4900      ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4901      if(U_FAILURE(errorCode)) {
4902         log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4903      }
4904      ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4905      if(U_FAILURE(errorCode)) {
4906         log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4907      }
4908      if (len!=1){
4909         log_err("Unexpected length of sub chars\n");
4910      }
4911      if (get_subchars[0] != new_subchars[0]){
4912           log_err("Unexpected value of sub chars\n");
4913      }
4914      ucnv_close(cnv1);
4915      ucnv_close(cnv2);
4916
4917    }
4918
4919    /* LMBCS to Unicode - offsets */
4920    {
4921       UErrorCode errorCode=U_ZERO_ERROR;
4922
4923       const char * pSource = (const char *)pszLMBCS;
4924       const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4925
4926       UChar Out [sizeof(pszUnicode) + 1];
4927       UChar * pOut = Out;
4928       UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
4929
4930       int32_t off [sizeof(offsets)];
4931
4932      /* last 'offset' in expected results is just the final size.
4933         (Makes other tests easier). Compensate here: */
4934
4935       off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS);
4936
4937
4938
4939      cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4940      if(U_FAILURE(errorCode)) {
4941           log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4942           return;
4943      }
4944
4945
4946
4947      ucnv_toUnicode (cnv,
4948                      &pOut,
4949                      OutLimit,
4950                      &pSource,
4951                      sourceLimit,
4952                      off,
4953                      TRUE,
4954                      &errorCode);
4955
4956
4957       if (memcmp(off,offsets,sizeof(offsets)))
4958       {
4959         log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4960       }
4961       if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4962       {
4963         log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4964       }
4965       ucnv_close(cnv);
4966    }
4967    {
4968   /* LMBCS to Unicode - getNextUChar */
4969      const char * sourceStart;
4970      const char *source=(const char *)pszLMBCS;
4971      const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4972      const UChar32 *results= pszUnicode32;
4973      const int *off = offsets32;
4974
4975      UErrorCode errorCode=U_ZERO_ERROR;
4976      UChar32 uniChar;
4977
4978      cnv=ucnv_open("LMBCS-1", &errorCode);
4979      if(U_FAILURE(errorCode)) {
4980           log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4981           return;
4982      }
4983      else
4984      {
4985
4986         while(source<limit) {
4987            sourceStart=source;
4988            uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4989            if(U_FAILURE(errorCode)) {
4990                  log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4991                  break;
4992            } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4993               log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4994                   uniChar, (source-sourceStart), *results, *off);
4995               break;
4996            }
4997            results++;
4998            off++;
4999         }
5000       }
5001       ucnv_close(cnv);
5002    }
5003    { /* test locale & optimization group operations: Unicode to LMBCS */
5004
5005      UErrorCode errorCode=U_ZERO_ERROR;
5006      UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
5007      UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
5008      UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
5009      UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
5010      const UChar * pUniOut = uniString;
5011      UChar * pUniIn = uniString;
5012      uint8_t lmbcsString [4];
5013      const char * pLMBCSOut = (const char *)lmbcsString;
5014      char * pLMBCSIn = (char *)lmbcsString;
5015
5016      /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5017      ucnv_fromUnicode (cnv16he,
5018                        &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)),
5019                        &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5020                        NULL, 1, &errorCode);
5021
5022      if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5023      {
5024         log_err("LMBCS-16,locale=he gives unexpected translation\n");
5025      }
5026
5027      pLMBCSIn= (char *)lmbcsString;
5028      pUniOut = uniString;
5029      ucnv_fromUnicode (cnv01us,
5030                        &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)),
5031                        &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5032                        NULL, 1, &errorCode);
5033
5034      if (lmbcsString[0] != 0x9F)
5035      {
5036         log_err("LMBCS-1,locale=US gives unexpected translation\n");
5037      }
5038
5039      /* single byte char from mbcs char set */
5040      lmbcsString[0] = 0xAE;  /* 1/2 width katakana letter small Yo */
5041      pLMBCSOut = (const char *)lmbcsString;
5042      pUniIn = uniString;
5043      ucnv_toUnicode (cnv16jp,
5044                        &pUniIn, pUniIn + 1,
5045                        &pLMBCSOut, (pLMBCSOut + 1),
5046                        NULL, 1, &errorCode);
5047      if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5048      {
5049           log_err("Unexpected results from LMBCS-16 single byte char\n");
5050      }
5051      /* convert to group 1: should be 3 bytes */
5052      pLMBCSIn = (char *)lmbcsString;
5053      pUniOut = uniString;
5054      ucnv_fromUnicode (cnv01us,
5055                        &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5056                        &pUniOut, pUniOut + 1,
5057                        NULL, 1, &errorCode);
5058      if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5059         || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5060      {
5061           log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5062      }
5063      pLMBCSOut = (const char *)lmbcsString;
5064      pUniIn = uniString;
5065      ucnv_toUnicode (cnv01us,
5066                        &pUniIn, pUniIn + 1,
5067                        &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5068                        NULL, 1, &errorCode);
5069      if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5070      {
5071           log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5072      }
5073      pLMBCSIn = (char *)lmbcsString;
5074      pUniOut = uniString;
5075      ucnv_fromUnicode (cnv16jp,
5076                        &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5077                        &pUniOut, pUniOut + 1,
5078                        NULL, 1, &errorCode);
5079      if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5080      {
5081           log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5082      }
5083      ucnv_close(cnv16he);
5084      ucnv_close(cnv16jp);
5085      ucnv_close(cnv01us);
5086    }
5087    {
5088       /* Small source buffer testing, LMBCS -> Unicode */
5089
5090       UErrorCode errorCode=U_ZERO_ERROR;
5091
5092       const char * pSource = (const char *)pszLMBCS;
5093       const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5094       int codepointCount = 0;
5095
5096       UChar Out [sizeof(pszUnicode) + 1];
5097       UChar * pOut = Out;
5098       UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
5099
5100
5101       cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5102       if(U_FAILURE(errorCode)) {
5103           log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5104           return;
5105       }
5106
5107
5108       while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5109       {
5110           ucnv_toUnicode (cnv,
5111               &pOut,
5112               OutLimit,
5113               &pSource,
5114               (pSource+1), /* claim that this is a 1- byte buffer */
5115               NULL,
5116               FALSE,    /* FALSE means there might be more chars in the next buffer */
5117               &errorCode);
5118
5119           if (U_SUCCESS (errorCode))
5120           {
5121               if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5122               {
5123                   /* we are on to the next code point: check value */
5124
5125                   if (Out[0] != pszUnicode[codepointCount]){
5126                       log_err("LMBCS->Uni result %lx should have been %lx \n",
5127                           Out[0], pszUnicode[codepointCount]);
5128                   }
5129
5130                   pOut = Out; /* reset for accumulating next code point */
5131                   codepointCount++;
5132               }
5133           }
5134           else
5135           {
5136               log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5137           }
5138       }
5139       {
5140         /* limits & surrogate error testing */
5141         char LIn [sizeof(pszLMBCS)];
5142         const char * pLIn = LIn;
5143
5144         char LOut [sizeof(pszLMBCS)];
5145         char * pLOut = LOut;
5146
5147         UChar UOut [sizeof(pszUnicode)];
5148         UChar * pUOut = UOut;
5149
5150         UChar UIn [sizeof(pszUnicode)];
5151         const UChar * pUIn = UIn;
5152
5153         int32_t off [sizeof(offsets)];
5154         UChar32 uniChar;
5155
5156         errorCode=U_ZERO_ERROR;
5157
5158         /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5159         pUIn++;
5160         ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
5161         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5162         {
5163            log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5164         }
5165         pUIn--;
5166
5167         errorCode=U_ZERO_ERROR;
5168         ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5169         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5170         {
5171            log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5172         }
5173         errorCode=U_ZERO_ERROR;
5174
5175         uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5176         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5177         {
5178            log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5179         }
5180         errorCode=U_ZERO_ERROR;
5181
5182         /* 0 byte source request - no error, no pointer movement */
5183         ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5184         ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5185         if(U_FAILURE(errorCode)) {
5186            log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5187         }
5188         if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5189         {
5190              log_err("Unexpected pointer move in 0 byte source request \n");
5191         }
5192         /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5193         uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5194         if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5195         {
5196            log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5197         }
5198         if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5199         {
5200            log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5201         }
5202         errorCode = U_ZERO_ERROR;
5203
5204         /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5205
5206         pUIn = pszUnicode;
5207         ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,FALSE, &errorCode);
5208         if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5209         {
5210            log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5211         }
5212
5213         errorCode = U_ZERO_ERROR;
5214
5215         pLIn = (const char *)pszLMBCS;
5216         ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5217         if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5218         {
5219            log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5220         }
5221
5222         /* unpaired or chopped LMBCS surrogates */
5223
5224         /* OK high surrogate, Low surrogate is chopped */
5225         LIn [0] = (char)0x14;
5226         LIn [1] = (char)0xD8;
5227         LIn [2] = (char)0x01;
5228         LIn [3] = (char)0x14;
5229         LIn [4] = (char)0xDC;
5230         pLIn = LIn;
5231         errorCode = U_ZERO_ERROR;
5232         pUOut = UOut;
5233
5234         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5235         ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5236         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5237         {
5238            log_err("Unexpected results on chopped low surrogate\n");
5239         }
5240
5241         /* chopped at surrogate boundary */
5242         LIn [0] = (char)0x14;
5243         LIn [1] = (char)0xD8;
5244         LIn [2] = (char)0x01;
5245         pLIn = LIn;
5246         errorCode = U_ZERO_ERROR;
5247         pUOut = UOut;
5248
5249         ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5250         if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5251         {
5252            log_err("Unexpected results on chopped at surrogate boundary \n");
5253         }
5254
5255         /* unpaired surrogate plus valid Unichar */
5256         LIn [0] = (char)0x14;
5257         LIn [1] = (char)0xD8;
5258         LIn [2] = (char)0x01;
5259         LIn [3] = (char)0x14;
5260         LIn [4] = (char)0xC9;
5261         LIn [5] = (char)0xD0;
5262         pLIn = LIn;
5263         errorCode = U_ZERO_ERROR;
5264         pUOut = UOut;
5265
5266         ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5267         if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5268         {
5269            log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5270         }
5271
5272      /* unpaired surrogate plus chopped Unichar */
5273         LIn [0] = (char)0x14;
5274         LIn [1] = (char)0xD8;
5275         LIn [2] = (char)0x01;
5276         LIn [3] = (char)0x14;
5277         LIn [4] = (char)0xC9;
5278
5279         pLIn = LIn;
5280         errorCode = U_ZERO_ERROR;
5281         pUOut = UOut;
5282
5283         ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5284         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5285         {
5286            log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5287         }
5288
5289         /* unpaired surrogate plus valid non-Unichar */
5290         LIn [0] = (char)0x14;
5291         LIn [1] = (char)0xD8;
5292         LIn [2] = (char)0x01;
5293         LIn [3] = (char)0x0F;
5294         LIn [4] = (char)0x3B;
5295
5296         pLIn = LIn;
5297         errorCode = U_ZERO_ERROR;
5298         pUOut = UOut;
5299
5300         ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5301         if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5302         {
5303            log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5304         }
5305
5306         /* unpaired surrogate plus chopped non-Unichar */
5307         LIn [0] = (char)0x14;
5308         LIn [1] = (char)0xD8;
5309         LIn [2] = (char)0x01;
5310         LIn [3] = (char)0x0F;
5311
5312         pLIn = LIn;
5313         errorCode = U_ZERO_ERROR;
5314         pUOut = UOut;
5315
5316         ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5317
5318         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5319         {
5320            log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5321         }
5322       }
5323    }
5324   ucnv_close(cnv);  /* final cleanup */
5325}
5326
5327
5328static void TestJitterbug255()
5329{
5330    static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5331    const char *testBuffer = (const char *)testBytes;
5332    const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5333    UErrorCode status = U_ZERO_ERROR;
5334    /*UChar32 result;*/
5335    UConverter *cnv = 0;
5336
5337    cnv = ucnv_open("shift-jis", &status);
5338    if (U_FAILURE(status) || cnv == 0) {
5339        log_data_err("Failed to open the converter for SJIS.\n");
5340                return;
5341    }
5342    while (testBuffer != testEnd)
5343    {
5344        /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5345        if (U_FAILURE(status))
5346        {
5347            log_err("Failed to convert the next UChar for SJIS.\n");
5348            break;
5349        }
5350    }
5351    ucnv_close(cnv);
5352}
5353
5354static void TestEBCDICUS4XML()
5355{
5356    UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5357    static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5358    static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5359    static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5360    char target_x[] = {0x00, 0x00, 0x00, 0x00};
5361    UChar *unicodes = unicodes_x;
5362    const UChar *toUnicodeMaps = toUnicodeMaps_x;
5363    char *target = target_x;
5364    const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5365    UErrorCode status = U_ZERO_ERROR;
5366    UConverter *cnv = 0;
5367
5368    cnv = ucnv_open("ebcdic-xml-us", &status);
5369    if (U_FAILURE(status) || cnv == 0) {
5370        log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5371        return;
5372    }
5373    ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5374    if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5375        log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5376            u_errorName(status));
5377        printUSeqErr(unicodes_x, 3);
5378        printUSeqErr(toUnicodeMaps, 3);
5379    }
5380    status = U_ZERO_ERROR;
5381    ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5382    if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5383        log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5384            u_errorName(status));
5385        printSeqErr((const unsigned char*)target_x, 3);
5386        printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5387    }
5388    ucnv_close(cnv);
5389}
5390#endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5391
5392#if !UCONFIG_NO_COLLATION
5393
5394static void TestJitterbug981(){
5395    const UChar* rules;
5396    int32_t rules_length, target_cap, bytes_needed, buff_size;
5397    UErrorCode status = U_ZERO_ERROR;
5398    UConverter *utf8cnv;
5399    UCollator* myCollator;
5400    char *buff;
5401    int numNeeded=0;
5402    utf8cnv = ucnv_open ("utf8", &status);
5403    if(U_FAILURE(status)){
5404        log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5405        return;
5406    }
5407    myCollator = ucol_open("zh", &status);
5408    if(U_FAILURE(status)){
5409        log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5410        ucnv_close(utf8cnv);
5411        return;
5412    }
5413
5414    rules = ucol_getRules(myCollator, &rules_length);
5415    if(rules_length == 0) {
5416        log_data_err("missing zh tailoring rule string\n");
5417        ucol_close(myCollator);
5418        ucnv_close(utf8cnv);
5419        return;
5420    }
5421    buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5422    buff = malloc(buff_size);
5423
5424    target_cap = 0;
5425    do {
5426        ucnv_reset(utf8cnv);
5427        status = U_ZERO_ERROR;
5428        if(target_cap >= buff_size) {
5429            log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5430            break;
5431        }
5432        bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5433            rules, rules_length, &status);
5434        target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5435        if(numNeeded!=0 && numNeeded!= bytes_needed){
5436            log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5437            break;
5438        }
5439        numNeeded = bytes_needed;
5440    } while (status == U_BUFFER_OVERFLOW_ERROR);
5441    ucol_close(myCollator);
5442    ucnv_close(utf8cnv);
5443    free(buff);
5444}
5445
5446#endif
5447
5448#if !UCONFIG_NO_LEGACY_CONVERSION
5449static void TestJitterbug1293(){
5450    static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5451    char target[256];
5452    UErrorCode status = U_ZERO_ERROR;
5453    UConverter* conv=NULL;
5454    int32_t target_cap, bytes_needed, numNeeded = 0;
5455    conv = ucnv_open("shift-jis",&status);
5456    if(U_FAILURE(status)){
5457      log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5458      return;
5459    }
5460
5461    do{
5462        target_cap =0;
5463        bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5464        target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5465        if(numNeeded!=0 && numNeeded!= bytes_needed){
5466          log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5467        }
5468        numNeeded = bytes_needed;
5469    } while (status == U_BUFFER_OVERFLOW_ERROR);
5470    if(U_FAILURE(status)){
5471      log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5472      return;
5473    }
5474    ucnv_close(conv);
5475}
5476#endif
5477
5478static void TestJB5275_1(){
5479
5480    static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5481                                "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5482                                /* Switch script: */
5483                                "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5484                                "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5485                                "\xEF\x40\x3B\xB3\x0A";
5486    static const UChar expected[] ={
5487            0x003b, 0x0a15, 0x000a, /* Easy characters */
5488            0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5489            0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5490            0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5491            0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5492    };
5493
5494    UErrorCode status = U_ZERO_ERROR;
5495    UConverter* conv = ucnv_open("iscii-gur", &status);
5496    UChar dest[100] = {'\0'};
5497    UChar* target = dest;
5498    UChar* targetLimit = dest+100;
5499    const char* source = data;
5500    const char* sourceLimit = data+strlen(data);
5501    const UChar* exp = expected;
5502
5503    if (U_FAILURE(status)) {
5504        log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5505        return;
5506    }
5507
5508    log_verbose("Testing switching back to default script when new line is encountered.\n");
5509    ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5510    if(U_FAILURE(status)){
5511        log_err("conversion failed: %s \n", u_errorName(status));
5512    }
5513    targetLimit = target;
5514    target = dest;
5515    printUSeq(target, targetLimit-target);
5516    while(target<targetLimit){
5517        if(*exp!=*target){
5518            log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5519        }
5520        target++;
5521        exp++;
5522    }
5523    ucnv_close(conv);
5524}
5525
5526static void TestJB5275(){
5527    static const char* data =
5528    /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A"  unsupported sequence \xEF\x41 */
5529    /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A"  unsupported sequence \xEF\x41  */
5530    /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A"  unsupported sequence \xEF\x41 */
5531        "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A"  /* Gurmukhi test */
5532        "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A"  /* Gujarati test */
5533        "\xEF\x48\x38\xB3\x0A"  /* Kannada test */
5534        "\xEF\x49\x39\xB3\x0A"  /* Malayalam test */
5535        "\xEF\x4A\x3A\xB3\x0A"  /* Gujarati test */
5536        "\xEF\x4B\x3B\xB3\x0A"  /* Punjabi test */
5537        /* "\xEF\x4C\x3C\xB3\x0A"  unsupported sequence \xEF\x41 */;
5538    static const UChar expected[] ={
5539        0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5540        0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A,     /* Gujarati test */
5541        0x0038, 0x0C95, 0x000A, /* Kannada test */
5542        0x0039, 0x0D15, 0x000A, /* Malayalam test */
5543        0x003A, 0x0A95, 0x000A, /* Gujarati test */
5544        0x003B, 0x0A15, 0x000A, /* Punjabi test */
5545    };
5546
5547    UErrorCode status = U_ZERO_ERROR;
5548    UConverter* conv = ucnv_open("iscii", &status);
5549    UChar dest[100] = {'\0'};
5550    UChar* target = dest;
5551    UChar* targetLimit = dest+100;
5552    const char* source = data;
5553    const char* sourceLimit = data+strlen(data);
5554    const UChar* exp = expected;
5555    ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5556    if(U_FAILURE(status)){
5557        log_data_err("conversion failed: %s \n", u_errorName(status));
5558    }
5559    targetLimit = target;
5560    target = dest;
5561
5562    printUSeq(target, targetLimit-target);
5563
5564    while(target<targetLimit){
5565        if(*exp!=*target){
5566            log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5567        }
5568        target++;
5569        exp++;
5570    }
5571    ucnv_close(conv);
5572}
5573
5574static void
5575TestIsFixedWidth() {
5576    UErrorCode status = U_ZERO_ERROR;
5577    UConverter *cnv = NULL;
5578    int32_t i;
5579
5580    const char *fixedWidth[] = {
5581            "US-ASCII",
5582            "UTF32",
5583            "ibm-5478_P100-1995"
5584    };
5585
5586    const char *notFixedWidth[] = {
5587            "GB18030",
5588            "UTF8",
5589            "windows-949-2000",
5590            "UTF16"
5591    };
5592
5593    for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) {
5594        cnv = ucnv_open(fixedWidth[i], &status);
5595        if (cnv == NULL || U_FAILURE(status)) {
5596            log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5597            continue;
5598        }
5599
5600        if (!ucnv_isFixedWidth(cnv, &status)) {
5601            log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]);
5602        }
5603        ucnv_close(cnv);
5604    }
5605
5606    for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) {
5607        cnv = ucnv_open(notFixedWidth[i], &status);
5608        if (cnv == NULL || U_FAILURE(status)) {
5609            log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5610            continue;
5611        }
5612
5613        if (ucnv_isFixedWidth(cnv, &status)) {
5614            log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]);
5615        }
5616        ucnv_close(cnv);
5617    }
5618}
5619