1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*******************************************************************************
7*
8* File CCONVTST.C
9*
10* Modification History:
11*        Name                     Description
12*    Steven R. Loomis     7/8/1999      Adding input buffer test
13********************************************************************************
14*/
15#include <stdio.h>
16#include "cstring.h"
17#include "unicode/uloc.h"
18#include "unicode/ucnv.h"
19#include "unicode/ucnv_err.h"
20#include "unicode/ucnv_cb.h"
21#include "cintltst.h"
22#include "unicode/utypes.h"
23#include "unicode/ustring.h"
24#include "unicode/ucol.h"
25#include "unicode/utf16.h"
26#include "cmemory.h"
27#include "nucnvtst.h"
28
29#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
30
31static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
32static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
33#if !UCONFIG_NO_COLLATION
34static void TestJitterbug981(void);
35#endif
36#if !UCONFIG_NO_LEGACY_CONVERSION
37static void TestJitterbug1293(void);
38#endif
39static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
40static void TestConverterTypesAndStarters(void);
41static void TestAmbiguous(void);
42static void TestSignatureDetection(void);
43static void TestUTF7(void);
44static void TestIMAP(void);
45static void TestUTF8(void);
46static void TestCESU8(void);
47static void TestUTF16(void);
48static void TestUTF16BE(void);
49static void TestUTF16LE(void);
50static void TestUTF32(void);
51static void TestUTF32BE(void);
52static void TestUTF32LE(void);
53static void TestLATIN1(void);
54
55#if !UCONFIG_NO_LEGACY_CONVERSION
56static void TestSBCS(void);
57static void TestDBCS(void);
58static void TestMBCS(void);
59#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
60static void TestICCRunout(void);
61#endif
62
63#ifdef U_ENABLE_GENERIC_ISO_2022
64static void TestISO_2022(void);
65#endif
66
67static void TestISO_2022_JP(void);
68static void TestISO_2022_JP_1(void);
69static void TestISO_2022_JP_2(void);
70static void TestISO_2022_KR(void);
71static void TestISO_2022_KR_1(void);
72static void TestISO_2022_CN(void);
73#if 0
74   /*
75    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
76    */
77static void TestISO_2022_CN_EXT(void);
78#endif
79static void TestJIS(void);
80static void TestHZ(void);
81#endif
82
83static void TestSCSU(void);
84
85#if !UCONFIG_NO_LEGACY_CONVERSION
86static void TestEBCDIC_STATEFUL(void);
87static void TestGB18030(void);
88static void TestLMBCS(void);
89static void TestJitterbug255(void);
90static void TestEBCDICUS4XML(void);
91#if 0
92   /*
93    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
94    */
95static void TestJitterbug915(void);
96#endif
97static void TestISCII(void);
98
99static void TestCoverageMBCS(void);
100static void TestJitterbug2346(void);
101static void TestJitterbug2411(void);
102static void TestJB5275(void);
103static void TestJB5275_1(void);
104static void TestJitterbug6175(void);
105
106static void TestIsFixedWidth(void);
107#endif
108
109static void TestInBufSizes(void);
110
111static void TestRoundTrippingAllUTF(void);
112static void TestConv(const uint16_t in[],
113                     int len,
114                     const char* conv,
115                     const char* lang,
116                     char byteArr[],
117                     int byteArrLen);
118
119/* open a converter, using test data if it begins with '@' */
120static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
121
122
123#define NEW_MAX_BUFFER 999
124
125static int32_t  gInBufferSize = NEW_MAX_BUFFER;
126static int32_t  gOutBufferSize = NEW_MAX_BUFFER;
127static char     gNuConvTestName[1024];
128
129#define nct_min(x,y)  ((x<y) ? x : y)
130
131static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
132{
133  if(cnv && cnv[0] == '@') {
134    return ucnv_openPackage(loadTestData(err), cnv+1, err);
135  } else {
136    return ucnv_open(cnv, err);
137  }
138}
139
140static void printSeq(const unsigned char* a, int len)
141{
142    int i=0;
143    log_verbose("{");
144    while (i<len)
145        log_verbose("0x%02x ", a[i++]);
146    log_verbose("}\n");
147}
148
149static void printUSeq(const UChar* a, int len)
150{
151    int i=0;
152    log_verbose("{U+");
153    while (i<len) log_verbose("0x%04x ", a[i++]);
154    log_verbose("}\n");
155}
156
157static void printSeqErr(const unsigned char* a, int len)
158{
159    int i=0;
160    fprintf(stderr, "{");
161    while (i<len)
162        fprintf(stderr, "0x%02x ", a[i++]);
163    fprintf(stderr, "}\n");
164}
165
166static void printUSeqErr(const UChar* a, int len)
167{
168    int i=0;
169    fprintf(stderr, "{U+");
170    while (i<len)
171        fprintf(stderr, "0x%04x ", a[i++]);
172    fprintf(stderr,"}\n");
173}
174
175static void
176TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
177{
178     const char* s0;
179     const char* s=(char*)source;
180     const int32_t *r=results;
181     UErrorCode errorCode=U_ZERO_ERROR;
182     UChar32 c;
183
184     while(s<limit) {
185        s0=s;
186        c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
187        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
188            break; /* no more significant input */
189        } else if(U_FAILURE(errorCode)) {
190            log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
191            break;
192        } else if(
193            /* test the expected number of input bytes only if >=0 */
194            (*r>=0 && (int32_t)(s-s0)!=*r) ||
195            c!=*(r+1)
196        ) {
197            log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
198                message, c, (s-s0), *(r+1), *r);
199            break;
200        }
201        r+=2;
202    }
203}
204
205static void
206TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
207{
208     const char* s=(char*)source;
209     UErrorCode errorCode=U_ZERO_ERROR;
210     uint32_t c;
211     c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
212     if(errorCode != expected){
213        log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
214     }
215     if(c != 0xFFFD && c != 0xffff){
216        log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
217     }
218
219}
220
221static void TestInBufSizes(void)
222{
223  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
224#if 1
225  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
226  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
227  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
228  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
229  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
230  TestNewConvertWithBufferSizes(1,1);
231  TestNewConvertWithBufferSizes(2,3);
232  TestNewConvertWithBufferSizes(3,2);
233#endif
234}
235
236static void TestOutBufSizes(void)
237{
238#if 1
239  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
240  TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
241  TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
242  TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
243  TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
244  TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
245
246#endif
247}
248
249
250void addTestNewConvert(TestNode** root)
251{
252#if !UCONFIG_NO_FILE_IO
253   addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
254   addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
255#endif
256   addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
257   addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
258   addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
259   addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
260   addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
261   addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
262
263   /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
264   addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
265   addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
266   addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
267   addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
268   addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
269   addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
270   addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
271
272#if !UCONFIG_NO_LEGACY_CONVERSION
273   addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
274#endif
275
276   addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
277
278#if !UCONFIG_NO_LEGACY_CONVERSION
279   addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
280#if !UCONFIG_NO_FILE_IO
281   addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
282   addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
283#endif
284   addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
285
286#ifdef U_ENABLE_GENERIC_ISO_2022
287   addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
288#endif
289
290   addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
291   addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
292   addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
293   // android-changed (no have ISO_2022_JP_2) -- addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
294   addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
295   addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
296   // android-changed (no ISO-2022-CN) -- addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
297   /*
298    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
299   addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
300   addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
301    */
302   addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
303#endif
304
305   addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
306
307#if !UCONFIG_NO_LEGACY_CONVERSION
308   addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
309   addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
310   addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
311   addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
312   addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
313   addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
314   addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
315#if !UCONFIG_NO_COLLATION
316   // android-removed (no collation tailoring rules)  -- addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
317#endif
318
319   addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
320#endif
321
322
323#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
324   addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
325#endif
326
327   addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
328
329#if !UCONFIG_NO_LEGACY_CONVERSION
330   addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
331   addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
332   // android-removed (no full ISO2022 CJK tables)  -- addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
333   addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
334#endif
335}
336
337
338/* Note that this test already makes use of statics, so it's not really
339   multithread safe.
340   This convenience function lets us make the error messages actually useful.
341*/
342
343static void setNuConvTestName(const char *codepage, const char *direction)
344{
345    sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
346        codepage,
347        direction,
348        (int)gInBufferSize,
349        (int)gOutBufferSize);
350}
351
352typedef enum
353{
354  TC_OK       = 0,  /* test was OK */
355  TC_MISMATCH = 1,  /* Match failed - err was printed */
356  TC_FAIL     = 2   /* Test failed, don't print an err because it was already printed. */
357} ETestConvertResult;
358
359/* Note: This function uses global variables and it will not do offset
360checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
361static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
362                const char *codepage, const int32_t *expectOffsets , UBool useFallback)
363{
364    UErrorCode status = U_ZERO_ERROR;
365    UConverter *conv = 0;
366    char    junkout[NEW_MAX_BUFFER]; /* FIX */
367    int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
368    char *p;
369    const UChar *src;
370    char *end;
371    char *targ;
372    int32_t *offs;
373    int i;
374    int32_t   realBufferSize;
375    char *realBufferEnd;
376    const UChar *realSourceEnd;
377    const UChar *sourceLimit;
378    UBool checkOffsets = TRUE;
379    UBool doFlush;
380
381    for(i=0;i<NEW_MAX_BUFFER;i++)
382        junkout[i] = (char)0xF0;
383    for(i=0;i<NEW_MAX_BUFFER;i++)
384        junokout[i] = 0xFF;
385
386    setNuConvTestName(codepage, "FROM");
387
388    log_verbose("\n=========  %s\n", gNuConvTestName);
389
390    conv = my_ucnv_open(codepage, &status);
391
392    if(U_FAILURE(status))
393    {
394        log_data_err("Couldn't open converter %s\n",codepage);
395        return TC_FAIL;
396    }
397    if(useFallback){
398        ucnv_setFallback(conv,useFallback);
399    }
400
401    log_verbose("Converter opened..\n");
402
403    src = source;
404    targ = junkout;
405    offs = junokout;
406
407    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
408    realBufferEnd = junkout + realBufferSize;
409    realSourceEnd = source + sourceLen;
410
411    if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
412        checkOffsets = FALSE;
413
414    do
415    {
416      end = nct_min(targ + gOutBufferSize, realBufferEnd);
417      sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
418
419      doFlush = (UBool)(sourceLimit == realSourceEnd);
420
421      if(targ == realBufferEnd) {
422        log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
423        return TC_FAIL;
424      }
425      log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
426
427
428      status = U_ZERO_ERROR;
429
430      ucnv_fromUnicode (conv,
431                        &targ,
432                        end,
433                        &src,
434                        sourceLimit,
435                        checkOffsets ? offs : NULL,
436                        doFlush, /* flush if we're at the end of the input data */
437                        &status);
438    } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
439
440    if(U_FAILURE(status)) {
441      log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
442      return TC_FAIL;
443    }
444
445    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
446                sourceLen, targ-junkout);
447
448    if(getTestOption(VERBOSITY_OPTION))
449    {
450      char junk[9999];
451      char offset_str[9999];
452      char *ptr;
453
454      junk[0] = 0;
455      offset_str[0] = 0;
456      for(ptr = junkout;ptr<targ;ptr++) {
457        sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
458        sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
459      }
460
461      log_verbose(junk);
462      printSeq((const uint8_t *)expect, expectLen);
463      if ( checkOffsets ) {
464        log_verbose("\nOffsets:");
465        log_verbose(offset_str);
466      }
467      log_verbose("\n");
468    }
469    ucnv_close(conv);
470
471    if(expectLen != targ-junkout) {
472      log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
473      log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
474      fprintf(stderr, "Got:\n");
475      printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
476      fprintf(stderr, "Expected:\n");
477      printSeqErr((const unsigned char*)expect, expectLen);
478      return TC_MISMATCH;
479    }
480
481    if (checkOffsets && (expectOffsets != 0) ) {
482      log_verbose("comparing %d offsets..\n", targ-junkout);
483      if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
484        log_err("did not get the expected offsets. %s\n", gNuConvTestName);
485        printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
486        log_err("\n");
487        log_err("Got  :     ");
488        for(p=junkout;p<targ;p++) {
489          log_err("%d,", junokout[p-junkout]);
490        }
491        log_err("\n");
492        log_err("Expected:  ");
493        for(i=0; i<(targ-junkout); i++) {
494          log_err("%d,", expectOffsets[i]);
495        }
496        log_err("\n");
497      }
498    }
499
500    log_verbose("comparing..\n");
501    if(!memcmp(junkout, expect, expectLen)) {
502      log_verbose("Matches!\n");
503      return TC_OK;
504    } else {
505      log_err("String does not match u->%s\n", gNuConvTestName);
506      printUSeqErr(source, sourceLen);
507      fprintf(stderr, "Got:\n");
508      printSeqErr((const unsigned char *)junkout, expectLen);
509      fprintf(stderr, "Expected:\n");
510      printSeqErr((const unsigned char *)expect, expectLen);
511
512      return TC_MISMATCH;
513    }
514}
515
516/* Note: This function uses global variables and it will not do offset
517checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
518static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
519                                          const char *codepage, const int32_t *expectOffsets, UBool useFallback)
520{
521    UErrorCode status = U_ZERO_ERROR;
522    UConverter *conv = 0;
523    UChar    junkout[NEW_MAX_BUFFER]; /* FIX */
524    int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
525    const char *src;
526    const char *realSourceEnd;
527    const char *srcLimit;
528    UChar *p;
529    UChar *targ;
530    UChar *end;
531    int32_t *offs;
532    int i;
533    UBool   checkOffsets = TRUE;
534
535    int32_t   realBufferSize;
536    UChar *realBufferEnd;
537
538
539    for(i=0;i<NEW_MAX_BUFFER;i++)
540        junkout[i] = 0xFFFE;
541
542    for(i=0;i<NEW_MAX_BUFFER;i++)
543        junokout[i] = -1;
544
545    setNuConvTestName(codepage, "TO");
546
547    log_verbose("\n=========  %s\n", gNuConvTestName);
548
549    conv = my_ucnv_open(codepage, &status);
550
551    if(U_FAILURE(status))
552    {
553        log_data_err("Couldn't open converter %s\n",gNuConvTestName);
554        return TC_FAIL;
555    }
556    if(useFallback){
557        ucnv_setFallback(conv,useFallback);
558    }
559    log_verbose("Converter opened..\n");
560
561    src = (const char *)source;
562    targ = junkout;
563    offs = junokout;
564
565    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
566    realBufferEnd = junkout + realBufferSize;
567    realSourceEnd = src + sourcelen;
568
569    if ( gOutBufferSize != realBufferSize ||  gInBufferSize != NEW_MAX_BUFFER )
570        checkOffsets = FALSE;
571
572    do
573    {
574        end = nct_min( targ + gOutBufferSize, realBufferEnd);
575        srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
576
577        if(targ == realBufferEnd)
578        {
579            log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
580            return TC_FAIL;
581        }
582        log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
583
584        /* oldTarg = targ; */
585
586        status = U_ZERO_ERROR;
587
588        ucnv_toUnicode (conv,
589                &targ,
590                end,
591                &src,
592                srcLimit,
593                checkOffsets ? offs : NULL,
594                (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
595                &status);
596
597        /*        offs += (targ-oldTarg); */
598
599      } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
600
601    if(U_FAILURE(status))
602    {
603        log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
604        return TC_FAIL;
605    }
606
607    log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
608        sourcelen, targ-junkout);
609    if(getTestOption(VERBOSITY_OPTION))
610    {
611        char junk[9999];
612        char offset_str[9999];
613        UChar *ptr;
614
615        junk[0] = 0;
616        offset_str[0] = 0;
617
618        for(ptr = junkout;ptr<targ;ptr++)
619        {
620            sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
621            sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
622        }
623
624        log_verbose(junk);
625        printUSeq(expect, expectlen);
626        if ( checkOffsets )
627          {
628            log_verbose("\nOffsets:");
629            log_verbose(offset_str);
630          }
631        log_verbose("\n");
632    }
633    ucnv_close(conv);
634
635    log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
636
637    if (checkOffsets && (expectOffsets != 0))
638    {
639        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
640            log_err("did not get the expected offsets. %s\n",gNuConvTestName);
641            log_err("Got:      ");
642            for(p=junkout;p<targ;p++) {
643                log_err("%d,", junokout[p-junkout]);
644            }
645            log_err("\n");
646            log_err("Expected: ");
647            for(i=0; i<(targ-junkout); i++) {
648                log_err("%d,", expectOffsets[i]);
649            }
650            log_err("\n");
651            log_err("output:   ");
652            for(i=0; i<(targ-junkout); i++) {
653                log_err("%X,", junkout[i]);
654            }
655            log_err("\n");
656            log_err("input:    ");
657            for(i=0; i<(src-(const char *)source); i++) {
658                log_err("%X,", (unsigned char)source[i]);
659            }
660            log_err("\n");
661        }
662    }
663
664    if(!memcmp(junkout, expect, expectlen*2))
665    {
666        log_verbose("Matches!\n");
667        return TC_OK;
668    }
669    else
670    {
671        log_err("String does not match. %s\n", gNuConvTestName);
672        log_verbose("String does not match. %s\n", gNuConvTestName);
673        printf("\nGot:");
674        printUSeqErr(junkout, expectlen);
675        printf("\nExpected:");
676        printUSeqErr(expect, expectlen);
677        return TC_MISMATCH;
678    }
679}
680
681
682static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
683{
684/** test chars #1 */
685    /*  1 2 3  1Han 2Han 3Han .  */
686    static const UChar   sampleText[] =
687     { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
688    static const UChar sampleTextRoundTripUnmappable[] =
689    { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
690
691
692    static const uint8_t expectedUTF8[] =
693     { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
694    static const int32_t toUTF8Offs[] =
695     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
696    static const int32_t fmUTF8Offs[] =
697     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
698
699#ifdef U_ENABLE_GENERIC_ISO_2022
700    /* Same as UTF8, but with ^[%B preceeding */
701    static const const uint8_t expectedISO2022[] =
702     { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
703    static const int32_t toISO2022Offs[]     =
704     { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
705       0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
706    static const int32_t fmISO2022Offs[] =
707     { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
708#endif
709
710    /*  1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
711    static const uint8_t expectedIBM930[] =
712     { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
713    static const int32_t toIBM930Offs[] =
714     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
715    static const int32_t fmIBM930Offs[] =
716     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
717
718    /* 1 2 3 0 h1 h2 h3 . MBCS*/
719    static const uint8_t expectedIBM943[] =
720     {  0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
721    static const int32_t toIBM943Offs    [] =
722     {  0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
723    static const int32_t fmIBM943Offs[] =
724     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
725
726    /* 1 2 3 0 h1 h2 h3 . DBCS*/
727    static const uint8_t expectedIBM9027[] =
728     {  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
729    static const int32_t toIBM9027Offs    [] =
730     {  0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
731
732     /* 1 2 3 0 <?> <?> <?> . SBCS*/
733    static const uint8_t expectedIBM920[] =
734     {  0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
735    static const int32_t toIBM920Offs    [] =
736     {  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
737
738    /* 1 2 3 0 <?> <?> <?> . SBCS*/
739    static const uint8_t expectedISO88593[] =
740     { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
741    static const int32_t toISO88593Offs[]     =
742     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
743
744    /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
745    static const uint8_t expectedLATIN1[] =
746     { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
747    static const int32_t toLATIN1Offs[]     =
748     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
749
750
751    /*  etc */
752    static const uint8_t expectedUTF16BE[] =
753     { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
754    static const int32_t toUTF16BEOffs[]=
755     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
756    static const int32_t fmUTF16BEOffs[] =
757     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e, 0x0010, 0x0010 };
758
759    static const uint8_t expectedUTF16LE[] =
760     { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
761    static const int32_t toUTF16LEOffs[]=
762     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
763    static const int32_t fmUTF16LEOffs[] =
764     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
765
766    static const uint8_t expectedUTF32BE[] =
767     { 0x00, 0x00, 0x00, 0x31,
768       0x00, 0x00, 0x00, 0x32,
769       0x00, 0x00, 0x00, 0x33,
770       0x00, 0x00, 0x00, 0x00,
771       0x00, 0x00, 0x4e, 0x00,
772       0x00, 0x00, 0x4e, 0x8c,
773       0x00, 0x00, 0x4e, 0x09,
774       0x00, 0x00, 0x00, 0x2e,
775       0x00, 0x02, 0x00, 0x21 };
776    static const int32_t toUTF32BEOffs[]=
777     { 0x00, 0x00, 0x00, 0x00,
778       0x01, 0x01, 0x01, 0x01,
779       0x02, 0x02, 0x02, 0x02,
780       0x03, 0x03, 0x03, 0x03,
781       0x04, 0x04, 0x04, 0x04,
782       0x05, 0x05, 0x05, 0x05,
783       0x06, 0x06, 0x06, 0x06,
784       0x07, 0x07, 0x07, 0x07,
785       0x08, 0x08, 0x08, 0x08,
786       0x08, 0x08, 0x08, 0x08 };
787    static const int32_t fmUTF32BEOffs[] =
788     { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c, 0x0020, 0x0020 };
789
790    static const uint8_t expectedUTF32LE[] =
791     { 0x31, 0x00, 0x00, 0x00,
792       0x32, 0x00, 0x00, 0x00,
793       0x33, 0x00, 0x00, 0x00,
794       0x00, 0x00, 0x00, 0x00,
795       0x00, 0x4e, 0x00, 0x00,
796       0x8c, 0x4e, 0x00, 0x00,
797       0x09, 0x4e, 0x00, 0x00,
798       0x2e, 0x00, 0x00, 0x00,
799       0x21, 0x00, 0x02, 0x00 };
800    static const int32_t toUTF32LEOffs[]=
801     { 0x00, 0x00, 0x00, 0x00,
802       0x01, 0x01, 0x01, 0x01,
803       0x02, 0x02, 0x02, 0x02,
804       0x03, 0x03, 0x03, 0x03,
805       0x04, 0x04, 0x04, 0x04,
806       0x05, 0x05, 0x05, 0x05,
807       0x06, 0x06, 0x06, 0x06,
808       0x07, 0x07, 0x07, 0x07,
809       0x08, 0x08, 0x08, 0x08,
810       0x08, 0x08, 0x08, 0x08 };
811    static const int32_t fmUTF32LEOffs[] =
812     { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
813
814
815
816
817/** Test chars #2 **/
818
819    /* Sahha [health],  slashed h's */
820    static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
821    static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
822
823    /* LMBCS */
824    static const UChar LMBCSUChars[]     = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
825    static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
826    static const int32_t toLMBCSOffs[]   = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
827    static const int32_t fmLMBCSOffs[]   = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
828    /*********************************** START OF CODE finally *************/
829
830    gInBufferSize = insize;
831    gOutBufferSize = outsize;
832
833    log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
834
835
836    /*UTF-8*/
837    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
838        expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
839
840    log_verbose("Test surrogate behaviour for UTF8\n");
841    {
842        static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
843        static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
844                           0xf0, 0x90, 0x90, 0x81,
845                           0xef, 0xbf, 0xbd
846        };
847        static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
848        testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
849                         expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
850
851
852    }
853
854#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
855    /*ISO-2022*/
856    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
857        expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
858#endif
859
860    /*UTF16 LE*/
861    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
862        expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
863    /*UTF16 BE*/
864    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
865        expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
866    /*UTF32 LE*/
867    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
868        expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
869    /*UTF32 BE*/
870    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
871        expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
872
873    /*LATIN_1*/
874    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
875        expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
876
877#if !UCONFIG_NO_LEGACY_CONVERSION
878    /*EBCDIC_STATEFUL*/
879    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
880        expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
881
882    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
883        expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
884
885    /*MBCS*/
886
887    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
888        expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
889    /*DBCS*/
890    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
891        expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
892    /*SBCS*/
893    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
894        expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
895    /*SBCS*/
896    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
897        expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
898#endif
899
900
901/****/
902
903    /*UTF-8*/
904    testConvertToU(expectedUTF8, sizeof(expectedUTF8),
905        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
906#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
907    /*ISO-2022*/
908    testConvertToU(expectedISO2022, sizeof(expectedISO2022),
909        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
910#endif
911
912    /*UTF16 LE*/
913    testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
914        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
915    /*UTF16 BE*/
916    testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
917        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
918    /*UTF32 LE*/
919    testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
920        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
921    /*UTF32 BE*/
922    testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
923        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
924
925#if !UCONFIG_NO_LEGACY_CONVERSION
926    /*EBCDIC_STATEFUL*/
927    testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
928            sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE);
929    /*MBCS*/
930    testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
931            sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE);
932#endif
933
934    /* Try it again to make sure it still works */
935    testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
936        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
937
938#if !UCONFIG_NO_LEGACY_CONVERSION
939    testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
940        malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
941
942    testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
943        expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
944
945    /*LMBCS*/
946    testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
947        expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
948    testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
949        LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
950#endif
951
952    /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
953    {
954        /* encode directly set D and set O */
955        static const uint8_t utf7[] = {
956            /*
957                Hi Mom -+Jjo--!
958                A+ImIDkQ.
959                +-
960                +ZeVnLIqe-
961            */
962            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
963            0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
964            0x2b, 0x2d,
965            0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
966        };
967        static const UChar unicode[] = {
968            /*
969                Hi Mom -<WHITE SMILING FACE>-!
970                A<NOT IDENTICAL TO><ALPHA>.
971                +
972                [Japanese word "nihongo"]
973            */
974            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
975            0x41, 0x2262, 0x0391, 0x2e,
976            0x2b,
977            0x65e5, 0x672c, 0x8a9e
978        };
979        static const int32_t toUnicodeOffsets[] = {
980            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
981            15, 17, 19, 23,
982            24,
983            27, 29, 32
984        };
985        static const int32_t fromUnicodeOffsets[] = {
986            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
987            11, 12, 12, 12, 13, 13, 13, 13, 14,
988            15, 15,
989            16, 16, 16, 17, 17, 17, 18, 18, 18, 18
990        };
991
992        /* same but escaping set O (the exclamation mark) */
993        static const uint8_t utf7Restricted[] = {
994            /*
995                Hi Mom -+Jjo--+ACE-
996                A+ImIDkQ.
997                +-
998                +ZeVnLIqe-
999            */
1000            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1001            0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1002            0x2b, 0x2d,
1003            0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1004        };
1005        static const int32_t toUnicodeOffsetsR[] = {
1006            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1007            19, 21, 23, 27,
1008            28,
1009            31, 33, 36
1010        };
1011        static const int32_t fromUnicodeOffsetsR[] = {
1012            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1013            11, 12, 12, 12, 13, 13, 13, 13, 14,
1014            15, 15,
1015            16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1016        };
1017
1018        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
1019
1020        testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
1021
1022        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
1023
1024        testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
1025    }
1026
1027    /*
1028     * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1029     * modified according to RFC 2060,
1030     * and supplemented with the one example in RFC 2060 itself.
1031     */
1032    {
1033        static const uint8_t imap[] = {
1034            /*  Hi Mom -&Jjo--!
1035                A&ImIDkQ-.
1036                &-
1037                &ZeVnLIqe-
1038                \
1039                ~peter
1040                /mail
1041                /&ZeVnLIqe-
1042                /&U,BTFw-
1043            */
1044            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1045            0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1046            0x26, 0x2d,
1047            0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1048            0x5c,
1049            0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1050            0x2f, 0x6d, 0x61, 0x69, 0x6c,
1051            0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1052            0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1053        };
1054        static const UChar unicode[] = {
1055            /*  Hi Mom -<WHITE SMILING FACE>-!
1056                A<NOT IDENTICAL TO><ALPHA>.
1057                &
1058                [Japanese word "nihongo"]
1059                \
1060                ~peter
1061                /mail
1062                /<65e5, 672c, 8a9e>
1063                /<53f0, 5317>
1064            */
1065            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1066            0x41, 0x2262, 0x0391, 0x2e,
1067            0x26,
1068            0x65e5, 0x672c, 0x8a9e,
1069            0x5c,
1070            0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1071            0x2f, 0x6d, 0x61, 0x69, 0x6c,
1072            0x2f, 0x65e5, 0x672c, 0x8a9e,
1073            0x2f, 0x53f0, 0x5317
1074        };
1075        static const int32_t toUnicodeOffsets[] = {
1076            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1077            15, 17, 19, 24,
1078            25,
1079            28, 30, 33,
1080            37,
1081            38, 39, 40, 41, 42, 43,
1082            44, 45, 46, 47, 48,
1083            49, 51, 53, 56,
1084            60, 62, 64
1085        };
1086        static const int32_t fromUnicodeOffsets[] = {
1087            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1088            11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1089            15, 15,
1090            16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1091            19,
1092            20, 21, 22, 23, 24, 25,
1093            26, 27, 28, 29, 30,
1094            31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1095            35, 36, 36, 36, 37, 37, 37, 37, 37
1096        };
1097
1098        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1099
1100        testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1101    }
1102
1103    /* Test UTF-8 bad data handling*/
1104    {
1105        static const uint8_t utf8[]={
1106            0x61,
1107            0xf7, 0xbf, 0xbf, 0xbf,         /* > 10FFFF */
1108            0x00,
1109            0x62,
1110            0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1111            0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1112            0xf4, 0x8f, 0xbf, 0xbf,         /* 10FFFF */
1113            0xdf, 0xbf,                     /* 7ff */
1114            0xbf,                           /* truncated tail */
1115            0xf4, 0x90, 0x80, 0x80,         /* 11FFFF */
1116            0x02
1117        };
1118
1119        static const uint16_t utf8Expected[]={
1120            0x0061,
1121            0xfffd,
1122            0x0000,
1123            0x0062,
1124            0xfffd,
1125            0xfffd,
1126            0xdbff, 0xdfff,
1127            0x07ff,
1128            0xfffd,
1129            0xfffd,
1130            0x0002
1131        };
1132
1133        static const int32_t utf8Offsets[]={
1134            0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1135        };
1136        testConvertToU(utf8, sizeof(utf8),
1137                       utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
1138
1139    }
1140
1141    /* Test UTF-32BE bad data handling*/
1142    {
1143        static const uint8_t utf32[]={
1144            0x00, 0x00, 0x00, 0x61,
1145            0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
1146            0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1147            0x00, 0x00, 0x00, 0x62,
1148            0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1149            0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
1150            0x00, 0x00, 0x01, 0x62,
1151            0x00, 0x00, 0x02, 0x62
1152        };
1153        static const uint16_t utf32Expected[]={
1154            0x0061,
1155            0xfffd,         /* 0x110000 out of range */
1156            0xDBFF,         /* 0x10FFFF in range */
1157            0xDFFF,
1158            0x0062,
1159            0xfffd,         /* 0xffffffff out of range */
1160            0xfffd,         /* 0x7fffffff out of range */
1161            0x0162,
1162            0x0262
1163        };
1164        static const int32_t utf32Offsets[]={
1165            0, 4, 8, 8, 12, 16, 20, 24, 28
1166        };
1167        static const uint8_t utf32ExpectedBack[]={
1168            0x00, 0x00, 0x00, 0x61,
1169            0x00, 0x00, 0xff, 0xfd,         /* 0x110000 out of range */
1170            0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1171            0x00, 0x00, 0x00, 0x62,
1172            0x00, 0x00, 0xff, 0xfd,         /* 0xffffffff out of range */
1173            0x00, 0x00, 0xff, 0xfd,         /* 0x7fffffff out of range */
1174            0x00, 0x00, 0x01, 0x62,
1175            0x00, 0x00, 0x02, 0x62
1176        };
1177        static const int32_t utf32OffsetsBack[]={
1178            0,0,0,0,
1179            1,1,1,1,
1180            2,2,2,2,
1181            4,4,4,4,
1182            5,5,5,5,
1183            6,6,6,6,
1184            7,7,7,7,
1185            8,8,8,8
1186        };
1187
1188        testConvertToU(utf32, sizeof(utf32),
1189                       utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
1190        testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1191            utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1192    }
1193
1194    /* Test UTF-32LE bad data handling*/
1195    {
1196        static const uint8_t utf32[]={
1197            0x61, 0x00, 0x00, 0x00,
1198            0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
1199            0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1200            0x62, 0x00, 0x00, 0x00,
1201            0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1202            0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
1203            0x62, 0x01, 0x00, 0x00,
1204            0x62, 0x02, 0x00, 0x00,
1205        };
1206
1207        static const uint16_t utf32Expected[]={
1208            0x0061,
1209            0xfffd,         /* 0x110000 out of range */
1210            0xDBFF,         /* 0x10FFFF in range */
1211            0xDFFF,
1212            0x0062,
1213            0xfffd,         /* 0xffffffff out of range */
1214            0xfffd,         /* 0x7fffffff out of range */
1215            0x0162,
1216            0x0262
1217        };
1218        static const int32_t utf32Offsets[]={
1219            0, 4, 8, 8, 12, 16, 20, 24, 28
1220        };
1221        static const uint8_t utf32ExpectedBack[]={
1222            0x61, 0x00, 0x00, 0x00,
1223            0xfd, 0xff, 0x00, 0x00,         /* 0x110000 out of range */
1224            0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1225            0x62, 0x00, 0x00, 0x00,
1226            0xfd, 0xff, 0x00, 0x00,         /* 0xffffffff out of range */
1227            0xfd, 0xff, 0x00, 0x00,         /* 0x7fffffff out of range */
1228            0x62, 0x01, 0x00, 0x00,
1229            0x62, 0x02, 0x00, 0x00
1230        };
1231        static const int32_t utf32OffsetsBack[]={
1232            0,0,0,0,
1233            1,1,1,1,
1234            2,2,2,2,
1235            4,4,4,4,
1236            5,5,5,5,
1237            6,6,6,6,
1238            7,7,7,7,
1239            8,8,8,8
1240        };
1241        testConvertToU(utf32, sizeof(utf32),
1242            utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
1243        testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1244            utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1245    }
1246}
1247
1248static void TestCoverageMBCS(){
1249#if 0
1250    UErrorCode status = U_ZERO_ERROR;
1251    const char *directory = loadTestData(&status);
1252    char* tdpath = NULL;
1253    char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1254    int len = strlen(directory);
1255    char* index=NULL;
1256
1257    tdpath = (char*) malloc(sizeof(char) * (len * 2));
1258    uprv_strcpy(saveDirectory,u_getDataDirectory());
1259    log_verbose("Retrieved data directory %s \n",saveDirectory);
1260    uprv_strcpy(tdpath,directory);
1261    index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1262
1263    if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1264            *(index+1)=0;
1265    }
1266    u_setDataDirectory(tdpath);
1267    log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1268#endif
1269
1270    /*some more test to increase the code coverage in MBCS.  Create an test converter from test1.ucm
1271      which is test file for MBCS conversion with single-byte codepage data.*/
1272    {
1273
1274        /* MBCS with single byte codepage data test1.ucm*/
1275        const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1276        const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1277        int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
1278
1279        /*from Unicode*/
1280        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1281            expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1282    }
1283
1284    /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
1285      which is test file for MBCS conversion with three-byte codepage data.*/
1286    {
1287
1288        /* MBCS with three byte codepage data test3.ucm*/
1289        const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1290        const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a,  0xff,};
1291        int32_t  totest3Offs[]        = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1292
1293        const uint8_t test3input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1294        const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1295        int32_t fromtest3Offs[]       = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1296
1297        /*from Unicode*/
1298        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1299            expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1300
1301        /*to Unicode*/
1302        testConvertToU(test3input, sizeof(test3input),
1303            expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
1304
1305    }
1306
1307    /*some more test to increase the code coverage in MBCS.  Create an test converter from test4.ucm
1308      which is test file for MBCS conversion with four-byte codepage data.*/
1309    {
1310
1311        /* MBCS with three byte codepage data test4.ucm*/
1312        static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1313        static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1314        static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1315
1316        static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1317        static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1318        static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1319
1320        /*from Unicode*/
1321        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1322            expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1323
1324        /*to Unicode*/
1325        testConvertToU(test4input, sizeof(test4input),
1326            expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
1327
1328    }
1329#if 0
1330    free(tdpath);
1331    /* restore the original data directory */
1332    log_verbose("Setting the data directory to %s \n", saveDirectory);
1333    u_setDataDirectory(saveDirectory);
1334    free(saveDirectory);
1335#endif
1336
1337}
1338
1339static void TestConverterType(const char *convName, UConverterType convType) {
1340    UConverter* myConverter;
1341    UErrorCode err = U_ZERO_ERROR;
1342
1343    myConverter = my_ucnv_open(convName, &err);
1344
1345    if (U_FAILURE(err)) {
1346        log_data_err("Failed to create an %s converter\n", convName);
1347        return;
1348    }
1349    else
1350    {
1351        if (ucnv_getType(myConverter)!=convType) {
1352            log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1353                convName, convType);
1354        }
1355        else {
1356            log_verbose("ucnv_getType %s ok\n", convName);
1357        }
1358    }
1359    ucnv_close(myConverter);
1360}
1361
1362static void TestConverterTypesAndStarters()
1363{
1364#if !UCONFIG_NO_LEGACY_CONVERSION
1365    UConverter* myConverter;
1366    UErrorCode err = U_ZERO_ERROR;
1367    UBool mystarters[256];
1368
1369/*    const UBool expectedKSCstarters[256] = {
1370        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1371        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1372        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1373        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1374        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1375        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1376        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1377        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1378        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1379        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1380        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1381        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1384        FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1385        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1386        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1387        TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1388        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1389        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1390        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1391        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1392        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1393        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1394        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1395        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1396
1397
1398    log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1399
1400    myConverter = ucnv_open("ksc", &err);
1401    if (U_FAILURE(err)) {
1402      log_data_err("Failed to create an ibm-ksc converter\n");
1403      return;
1404    }
1405    else
1406    {
1407        if (ucnv_getType(myConverter)!=UCNV_MBCS)
1408            log_err("ucnv_getType Failed for ibm-949\n");
1409        else
1410            log_verbose("ucnv_getType ibm-949 ok\n");
1411
1412        if(myConverter!=NULL)
1413            ucnv_getStarters(myConverter, mystarters, &err);
1414
1415        /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1416          log_err("Failed ucnv_getStarters for ksc\n");
1417          else
1418          log_verbose("ucnv_getStarters ok\n");*/
1419
1420    }
1421    ucnv_close(myConverter);
1422
1423    TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1424    TestConverterType("ibm-878", UCNV_SBCS);
1425#endif
1426
1427    TestConverterType("iso-8859-1", UCNV_LATIN_1);
1428
1429    TestConverterType("ibm-1208", UCNV_UTF8);
1430
1431    TestConverterType("utf-8", UCNV_UTF8);
1432    TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1433    TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1434    TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1435    TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1436
1437#if !UCONFIG_NO_LEGACY_CONVERSION
1438
1439#if defined(U_ENABLE_GENERIC_ISO_2022)
1440    TestConverterType("iso-2022", UCNV_ISO_2022);
1441#endif
1442
1443    TestConverterType("hz", UCNV_HZ);
1444#endif
1445
1446    TestConverterType("scsu", UCNV_SCSU);
1447
1448#if !UCONFIG_NO_LEGACY_CONVERSION
1449    TestConverterType("x-iscii-de", UCNV_ISCII);
1450#endif
1451
1452    TestConverterType("ascii", UCNV_US_ASCII);
1453    TestConverterType("utf-7", UCNV_UTF7);
1454    TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1455    TestConverterType("bocu-1", UCNV_BOCU1);
1456}
1457
1458static void
1459TestAmbiguousConverter(UConverter *cnv) {
1460    static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1461    UChar outUnicode[20]={ 0, 0, 0, 0 };
1462
1463    const char *s;
1464    UChar *u;
1465    UErrorCode errorCode;
1466    UBool isAmbiguous;
1467
1468    /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1469    errorCode=U_ZERO_ERROR;
1470    s=inBytes;
1471    u=outUnicode;
1472    ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
1473    if(U_FAILURE(errorCode)) {
1474        /* we do not care about general failures in this test; the input may just not be mappable */
1475        return;
1476    }
1477
1478    if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1479        /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1480        /* There are some encodings that are partially ASCII based,
1481        like the ISO-7 and GSM series of codepages, which we ignore. */
1482        return;
1483    }
1484
1485    isAmbiguous=ucnv_isAmbiguous(cnv);
1486
1487    /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1488    if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1489        log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1490            ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1491        return;
1492    }
1493
1494    if(outUnicode[2]!=0x5c) {
1495        /* needs fixup, fix it */
1496        ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1497        if(outUnicode[2]!=0x5c) {
1498            /* the fix failed */
1499            log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1500            return;
1501        }
1502    }
1503}
1504
1505static void TestAmbiguous()
1506{
1507    UErrorCode status = U_ZERO_ERROR;
1508    UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1509    static const char target[] = {
1510        /* "\\usr\\local\\share\\data\\icutest.txt" */
1511        0x5c, 0x75, 0x73, 0x72,
1512        0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1513        0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1514        0x5c, 0x64, 0x61, 0x74, 0x61,
1515        0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1516        0
1517    };
1518    UChar asciiResult[200], sjisResult[200];
1519    int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1520    const char *name;
1521
1522    /* enumerate all converters */
1523    status=U_ZERO_ERROR;
1524    for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1525        cnv=ucnv_open(name, &status);
1526        if(U_SUCCESS(status)) {
1527            /* BEGIN android-changed. To save space Android does not build full ISO-2022-CN CJK tables. */
1528            const char* cnvName = ucnv_getName(cnv, &status);
1529            if (strlen(cnvName) < 8 ||
1530                strncmp(cnvName, "ISO_2022_CN", 8) != 0) {
1531            TestAmbiguousConverter(cnv);
1532            }
1533            /* END android-changed */
1534            ucnv_close(cnv);
1535        } else {
1536            log_err("error: unable to open available converter \"%s\"\n", name);
1537            status=U_ZERO_ERROR;
1538        }
1539    }
1540
1541#if !UCONFIG_NO_LEGACY_CONVERSION
1542    sjis_cnv = ucnv_open("ibm-943", &status);
1543    if (U_FAILURE(status))
1544    {
1545        log_data_err("Failed to create a SJIS converter\n");
1546        return;
1547    }
1548    ascii_cnv = ucnv_open("LATIN-1", &status);
1549    if (U_FAILURE(status))
1550    {
1551        log_data_err("Failed to create a LATIN-1 converter\n");
1552        ucnv_close(sjis_cnv);
1553        return;
1554    }
1555    /* convert target from SJIS to Unicode */
1556    sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1557    if (U_FAILURE(status))
1558    {
1559        log_err("Failed to convert the SJIS string.\n");
1560        ucnv_close(sjis_cnv);
1561        ucnv_close(ascii_cnv);
1562        return;
1563    }
1564    /* convert target from Latin-1 to Unicode */
1565    /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1566    if (U_FAILURE(status))
1567    {
1568        log_err("Failed to convert the Latin-1 string.\n");
1569        ucnv_close(sjis_cnv);
1570        ucnv_close(ascii_cnv);
1571        return;
1572    }
1573    if (!ucnv_isAmbiguous(sjis_cnv))
1574    {
1575        log_err("SJIS converter should contain ambiguous character mappings.\n");
1576        ucnv_close(sjis_cnv);
1577        ucnv_close(ascii_cnv);
1578        return;
1579    }
1580    if (u_strcmp(sjisResult, asciiResult) == 0)
1581    {
1582        log_err("File separators for SJIS don't need to be fixed.\n");
1583    }
1584    ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1585    if (u_strcmp(sjisResult, asciiResult) != 0)
1586    {
1587        log_err("Fixing file separator for SJIS failed.\n");
1588    }
1589    ucnv_close(sjis_cnv);
1590    ucnv_close(ascii_cnv);
1591#endif
1592}
1593
1594static void
1595TestSignatureDetection(){
1596    /* with null terminated strings */
1597    {
1598        static const char* data[] = {
1599                "\xFE\xFF\x00\x00",     /* UTF-16BE */
1600                "\xFF\xFE\x00\x00",     /* UTF-16LE */
1601                "\xEF\xBB\xBF\x00",     /* UTF-8    */
1602                "\x0E\xFE\xFF\x00",     /* SCSU     */
1603
1604                "\xFE\xFF",             /* UTF-16BE */
1605                "\xFF\xFE",             /* UTF-16LE */
1606                "\xEF\xBB\xBF",         /* UTF-8    */
1607                "\x0E\xFE\xFF",         /* SCSU     */
1608
1609                "\xFE\xFF\x41\x42",     /* UTF-16BE */
1610                "\xFF\xFE\x41\x41",     /* UTF-16LE */
1611                "\xEF\xBB\xBF\x41",     /* UTF-8    */
1612                "\x0E\xFE\xFF\x41",     /* SCSU     */
1613
1614                "\x2B\x2F\x76\x38\x2D", /* UTF-7    */
1615                "\x2B\x2F\x76\x38\x41", /* UTF-7    */
1616                "\x2B\x2F\x76\x39\x41", /* UTF-7    */
1617                "\x2B\x2F\x76\x2B\x41", /* UTF-7    */
1618                "\x2B\x2F\x76\x2F\x41",  /* UTF-7    */
1619
1620                "\xDD\x73\x66\x73"      /* UTF-EBCDIC */
1621        };
1622        static const char* expected[] = {
1623                "UTF-16BE",
1624                "UTF-16LE",
1625                "UTF-8",
1626                "SCSU",
1627
1628                "UTF-16BE",
1629                "UTF-16LE",
1630                "UTF-8",
1631                "SCSU",
1632
1633                "UTF-16BE",
1634                "UTF-16LE",
1635                "UTF-8",
1636                "SCSU",
1637
1638                "UTF-7",
1639                "UTF-7",
1640                "UTF-7",
1641                "UTF-7",
1642                "UTF-7",
1643                "UTF-EBCDIC"
1644        };
1645        static const int32_t expectedLength[] ={
1646            2,
1647            2,
1648            3,
1649            3,
1650
1651            2,
1652            2,
1653            3,
1654            3,
1655
1656            2,
1657            2,
1658            3,
1659            3,
1660
1661            5,
1662            4,
1663            4,
1664            4,
1665            4,
1666            4
1667        };
1668        int i=0;
1669        UErrorCode err;
1670        int32_t signatureLength = -1;
1671        const char* source = NULL;
1672        const char* enc = NULL;
1673        for( ; i<sizeof(data)/sizeof(char*); i++){
1674            err = U_ZERO_ERROR;
1675            source = data[i];
1676            enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1677            if(U_FAILURE(err)){
1678                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1679                continue;
1680            }
1681            if(enc == NULL || strcmp(enc,expected[i]) !=0){
1682                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1683                continue;
1684            }
1685            if(signatureLength != expectedLength[i]){
1686                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1687            }
1688        }
1689    }
1690    {
1691        static const char* data[] = {
1692                "\xFE\xFF\x00",         /* UTF-16BE */
1693                "\xFF\xFE\x00",         /* UTF-16LE */
1694                "\xEF\xBB\xBF\x00",     /* UTF-8    */
1695                "\x0E\xFE\xFF\x00",     /* SCSU     */
1696                "\x00\x00\xFE\xFF",     /* UTF-32BE */
1697                "\xFF\xFE\x00\x00",     /* UTF-32LE */
1698                "\xFE\xFF",             /* UTF-16BE */
1699                "\xFF\xFE",             /* UTF-16LE */
1700                "\xEF\xBB\xBF",         /* UTF-8    */
1701                "\x0E\xFE\xFF",         /* SCSU     */
1702                "\x00\x00\xFE\xFF",     /* UTF-32BE */
1703                "\xFF\xFE\x00\x00",     /* UTF-32LE */
1704                "\xFE\xFF\x41\x42",     /* UTF-16BE */
1705                "\xFF\xFE\x41\x41",     /* UTF-16LE */
1706                "\xEF\xBB\xBF\x41",     /* UTF-8    */
1707                "\x0E\xFE\xFF\x41",     /* SCSU     */
1708                "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1709                "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1710                "\xFB\xEE\x28",         /* BOCU-1   */
1711                "\xFF\x41\x42"          /* NULL     */
1712        };
1713        static const int len[] = {
1714            3,
1715            3,
1716            4,
1717            4,
1718            4,
1719            4,
1720            2,
1721            2,
1722            3,
1723            3,
1724            4,
1725            4,
1726            4,
1727            4,
1728            4,
1729            4,
1730            5,
1731            5,
1732            3,
1733            3
1734        };
1735
1736        static const char* expected[] = {
1737                "UTF-16BE",
1738                "UTF-16LE",
1739                "UTF-8",
1740                "SCSU",
1741                "UTF-32BE",
1742                "UTF-32LE",
1743                "UTF-16BE",
1744                "UTF-16LE",
1745                "UTF-8",
1746                "SCSU",
1747                "UTF-32BE",
1748                "UTF-32LE",
1749                "UTF-16BE",
1750                "UTF-16LE",
1751                "UTF-8",
1752                "SCSU",
1753                "UTF-32BE",
1754                "UTF-32LE",
1755                "BOCU-1",
1756                NULL
1757        };
1758        static const int32_t expectedLength[] ={
1759            2,
1760            2,
1761            3,
1762            3,
1763            4,
1764            4,
1765            2,
1766            2,
1767            3,
1768            3,
1769            4,
1770            4,
1771            2,
1772            2,
1773            3,
1774            3,
1775            4,
1776            4,
1777            3,
1778            0
1779        };
1780        int i=0;
1781        UErrorCode err;
1782        int32_t signatureLength = -1;
1783        int32_t sourceLength=-1;
1784        const char* source = NULL;
1785        const char* enc = NULL;
1786        for( ; i<sizeof(data)/sizeof(char*); i++){
1787            err = U_ZERO_ERROR;
1788            source = data[i];
1789            sourceLength = len[i];
1790            enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1791            if(U_FAILURE(err)){
1792                log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1793                continue;
1794            }
1795            if(enc == NULL || strcmp(enc,expected[i]) !=0){
1796                if(expected[i] !=NULL){
1797                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1798                 continue;
1799                }
1800            }
1801            if(signatureLength != expectedLength[i]){
1802                log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1803            }
1804        }
1805    }
1806}
1807
1808static void TestUTF7() {
1809    /* test input */
1810    static const uint8_t in[]={
1811        /* H - +Jjo- - ! +- +2AHcAQ */
1812        0x48,
1813        0x2d,
1814        0x2b, 0x4a, 0x6a, 0x6f,
1815        0x2d, 0x2d,
1816        0x21,
1817        0x2b, 0x2d,
1818        0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1819    };
1820
1821    /* expected test results */
1822    static const int32_t results[]={
1823        /* number of bytes read, code point */
1824        1, 0x48,
1825        1, 0x2d,
1826        4, 0x263a, /* <WHITE SMILING FACE> */
1827        2, 0x2d,
1828        1, 0x21,
1829        2, 0x2b,
1830        7, 0x10401
1831    };
1832
1833    const char *cnvName;
1834    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1835    UErrorCode errorCode=U_ZERO_ERROR;
1836    UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1837    if(U_FAILURE(errorCode)) {
1838        log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1839        return;
1840    }
1841    TestNextUChar(cnv, source, limit, results, "UTF-7");
1842    /* Test the condition when source >= sourceLimit */
1843    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1844    cnvName = ucnv_getName(cnv, &errorCode);
1845    if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1846        log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1847    }
1848    ucnv_close(cnv);
1849}
1850
1851static void TestIMAP() {
1852    /* test input */
1853    static const uint8_t in[]={
1854        /* H - &Jjo- - ! &- &2AHcAQ- \ */
1855        0x48,
1856        0x2d,
1857        0x26, 0x4a, 0x6a, 0x6f,
1858        0x2d, 0x2d,
1859        0x21,
1860        0x26, 0x2d,
1861        0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1862    };
1863
1864    /* expected test results */
1865    static const int32_t results[]={
1866        /* number of bytes read, code point */
1867        1, 0x48,
1868        1, 0x2d,
1869        4, 0x263a, /* <WHITE SMILING FACE> */
1870        2, 0x2d,
1871        1, 0x21,
1872        2, 0x26,
1873        7, 0x10401
1874    };
1875
1876    const char *cnvName;
1877    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1878    UErrorCode errorCode=U_ZERO_ERROR;
1879    UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1880    if(U_FAILURE(errorCode)) {
1881        log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1882        return;
1883    }
1884    TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1885    /* Test the condition when source >= sourceLimit */
1886    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1887    cnvName = ucnv_getName(cnv, &errorCode);
1888    if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1889        log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1890    }
1891    ucnv_close(cnv);
1892}
1893
1894static void TestUTF8() {
1895    /* test input */
1896    static const uint8_t in[]={
1897        0x61,
1898        0xc2, 0x80,
1899        0xe0, 0xa0, 0x80,
1900        0xf0, 0x90, 0x80, 0x80,
1901        0xf4, 0x84, 0x8c, 0xa1,
1902        0xf0, 0x90, 0x90, 0x81
1903    };
1904
1905    /* expected test results */
1906    static const int32_t results[]={
1907        /* number of bytes read, code point */
1908        1, 0x61,
1909        2, 0x80,
1910        3, 0x800,
1911        4, 0x10000,
1912        4, 0x104321,
1913        4, 0x10401
1914    };
1915
1916    /* error test input */
1917    static const uint8_t in2[]={
1918        0x61,
1919        0xc0, 0x80,                     /* illegal non-shortest form */
1920        0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1921        0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1922        0xc0, 0xc0,                     /* illegal trail byte */
1923        0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1924        0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1925        0xfe,                           /* illegal byte altogether */
1926        0x62
1927    };
1928
1929    /* expected error test results */
1930    static const int32_t results2[]={
1931        /* number of bytes read, code point */
1932        1, 0x61,
1933        22, 0x62
1934    };
1935
1936    UConverterToUCallback cb;
1937    const void *p;
1938
1939    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1940    UErrorCode errorCode=U_ZERO_ERROR;
1941    UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1942    if(U_FAILURE(errorCode)) {
1943        log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1944        return;
1945    }
1946    TestNextUChar(cnv, source, limit, results, "UTF-8");
1947    /* Test the condition when source >= sourceLimit */
1948    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1949
1950    /* test error behavior with a skip callback */
1951    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1952    source=(const char *)in2;
1953    limit=(const char *)(in2+sizeof(in2));
1954    TestNextUChar(cnv, source, limit, results2, "UTF-8");
1955
1956    ucnv_close(cnv);
1957}
1958
1959static void TestCESU8() {
1960    /* test input */
1961    static const uint8_t in[]={
1962        0x61,
1963        0xc2, 0x80,
1964        0xe0, 0xa0, 0x80,
1965        0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1966        0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1967        0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1968        0xef, 0xbf, 0xbc
1969    };
1970
1971    /* expected test results */
1972    static const int32_t results[]={
1973        /* number of bytes read, code point */
1974        1, 0x61,
1975        2, 0x80,
1976        3, 0x800,
1977        6, 0x10000,
1978        3, 0xdc01,
1979        -1,0xd802,  /* may read 3 or 6 bytes */
1980        -1,0x10ffff,/* may read 0 or 3 bytes */
1981        3, 0xfffc
1982    };
1983
1984    /* error test input */
1985    static const uint8_t in2[]={
1986        0x61,
1987        0xc0, 0x80,                     /* illegal non-shortest form */
1988        0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1989        0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1990        0xc0, 0xc0,                     /* illegal trail byte */
1991        0xf0, 0x90, 0x80, 0x80,         /* illegal 4-byte supplementary code point */
1992        0xf4, 0x84, 0x8c, 0xa1,         /* illegal 4-byte supplementary code point */
1993        0xf0, 0x90, 0x90, 0x81,         /* illegal 4-byte supplementary code point */
1994        0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1995        0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1996        0xfe,                           /* illegal byte altogether */
1997        0x62
1998    };
1999
2000    /* expected error test results */
2001    static const int32_t results2[]={
2002        /* number of bytes read, code point */
2003        1, 0x61,
2004        34, 0x62
2005    };
2006
2007    UConverterToUCallback cb;
2008    const void *p;
2009
2010    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2011    UErrorCode errorCode=U_ZERO_ERROR;
2012    UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2013    if(U_FAILURE(errorCode)) {
2014        log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2015        return;
2016    }
2017    TestNextUChar(cnv, source, limit, results, "CESU-8");
2018    /* Test the condition when source >= sourceLimit */
2019    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2020
2021    /* test error behavior with a skip callback */
2022    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2023    source=(const char *)in2;
2024    limit=(const char *)(in2+sizeof(in2));
2025    TestNextUChar(cnv, source, limit, results2, "CESU-8");
2026
2027    ucnv_close(cnv);
2028}
2029
2030static void TestUTF16() {
2031    /* test input */
2032    static const uint8_t in1[]={
2033        0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2034    };
2035    static const uint8_t in2[]={
2036        0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2037    };
2038    static const uint8_t in3[]={
2039        0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2040    };
2041
2042    /* expected test results */
2043    static const int32_t results1[]={
2044        /* number of bytes read, code point */
2045        4, 0x4e00,
2046        2, 0xfeff
2047    };
2048    static const int32_t results2[]={
2049        /* number of bytes read, code point */
2050        4, 0x004e,
2051        2, 0xfffe
2052    };
2053    static const int32_t results3[]={
2054        /* number of bytes read, code point */
2055        2, 0xfefe,
2056        2, 0x4e00,
2057        2, 0xfeff,
2058        4, 0x20001
2059    };
2060
2061    const char *source, *limit;
2062
2063    UErrorCode errorCode=U_ZERO_ERROR;
2064    UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2065    if(U_FAILURE(errorCode)) {
2066        log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2067        return;
2068    }
2069
2070    source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2071    TestNextUChar(cnv, source, limit, results1, "UTF-16");
2072
2073    source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2074    ucnv_resetToUnicode(cnv);
2075    TestNextUChar(cnv, source, limit, results2, "UTF-16");
2076
2077    source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2078    ucnv_resetToUnicode(cnv);
2079    TestNextUChar(cnv, source, limit, results3, "UTF-16");
2080
2081    /* Test the condition when source >= sourceLimit */
2082    ucnv_resetToUnicode(cnv);
2083    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2084
2085    ucnv_close(cnv);
2086}
2087
2088static void TestUTF16BE() {
2089    /* test input */
2090    static const uint8_t in[]={
2091        0x00, 0x61,
2092        0x00, 0xc0,
2093        0x00, 0x31,
2094        0x00, 0xf4,
2095        0xce, 0xfe,
2096        0xd8, 0x01, 0xdc, 0x01
2097    };
2098
2099    /* expected test results */
2100    static const int32_t results[]={
2101        /* number of bytes read, code point */
2102        2, 0x61,
2103        2, 0xc0,
2104        2, 0x31,
2105        2, 0xf4,
2106        2, 0xcefe,
2107        4, 0x10401
2108    };
2109
2110    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2111    UErrorCode errorCode=U_ZERO_ERROR;
2112    UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2113    if(U_FAILURE(errorCode)) {
2114        log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2115        return;
2116    }
2117    TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2118    /* Test the condition when source >= sourceLimit */
2119    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2120    /*Test for the condition where there is an invalid character*/
2121    {
2122        static const uint8_t source2[]={0x61};
2123        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2124        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2125    }
2126#if 0
2127    /*
2128     * Test disabled because currently the UTF-16BE/LE converters are supposed
2129     * to not set errors for unpaired surrogates.
2130     * This may change with
2131     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2132     */
2133
2134    /*Test for the condition where there is a surrogate pair*/
2135    {
2136        const uint8_t source2[]={0xd8, 0x01};
2137        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2138    }
2139#endif
2140    ucnv_close(cnv);
2141}
2142
2143static void
2144TestUTF16LE() {
2145    /* test input */
2146    static const uint8_t in[]={
2147        0x61, 0x00,
2148        0x31, 0x00,
2149        0x4e, 0x2e,
2150        0x4e, 0x00,
2151        0x01, 0xd8, 0x01, 0xdc
2152    };
2153
2154    /* expected test results */
2155    static const int32_t results[]={
2156        /* number of bytes read, code point */
2157        2, 0x61,
2158        2, 0x31,
2159        2, 0x2e4e,
2160        2, 0x4e,
2161        4, 0x10401
2162    };
2163
2164    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2165    UErrorCode errorCode=U_ZERO_ERROR;
2166    UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2167    if(U_FAILURE(errorCode)) {
2168        log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2169        return;
2170    }
2171    TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2172    /* Test the condition when source >= sourceLimit */
2173    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2174    /*Test for the condition where there is an invalid character*/
2175    {
2176        static const uint8_t source2[]={0x61};
2177        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2178        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2179    }
2180#if 0
2181    /*
2182     * Test disabled because currently the UTF-16BE/LE converters are supposed
2183     * to not set errors for unpaired surrogates.
2184     * This may change with
2185     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2186     */
2187
2188    /*Test for the condition where there is a surrogate character*/
2189    {
2190        static const uint8_t source2[]={0x01, 0xd8};
2191        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2192    }
2193#endif
2194
2195    ucnv_close(cnv);
2196}
2197
2198static void TestUTF32() {
2199    /* test input */
2200    static const uint8_t in1[]={
2201        0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff
2202    };
2203    static const uint8_t in2[]={
2204        0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00
2205    };
2206    static const uint8_t in3[]={
2207        0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01
2208    };
2209
2210    /* expected test results */
2211    static const int32_t results1[]={
2212        /* number of bytes read, code point */
2213        8, 0x100f00,
2214        4, 0xfeff
2215    };
2216    static const int32_t results2[]={
2217        /* number of bytes read, code point */
2218        8, 0x0f1000,
2219        4, 0xfffe
2220    };
2221    static const int32_t results3[]={
2222        /* number of bytes read, code point */
2223        4, 0xfefe,
2224        4, 0x100f00,
2225        4, 0xfffd, /* unmatched surrogate */
2226        4, 0xfffd  /* unmatched surrogate */
2227    };
2228
2229    const char *source, *limit;
2230
2231    UErrorCode errorCode=U_ZERO_ERROR;
2232    UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2233    if(U_FAILURE(errorCode)) {
2234        log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2235        return;
2236    }
2237
2238    source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2239    TestNextUChar(cnv, source, limit, results1, "UTF-32");
2240
2241    source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2242    ucnv_resetToUnicode(cnv);
2243    TestNextUChar(cnv, source, limit, results2, "UTF-32");
2244
2245    source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2246    ucnv_resetToUnicode(cnv);
2247    TestNextUChar(cnv, source, limit, results3, "UTF-32");
2248
2249    /* Test the condition when source >= sourceLimit */
2250    ucnv_resetToUnicode(cnv);
2251    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2252
2253    ucnv_close(cnv);
2254}
2255
2256static void
2257TestUTF32BE() {
2258    /* test input */
2259    static const uint8_t in[]={
2260        0x00, 0x00, 0x00, 0x61,
2261        0x00, 0x00, 0x30, 0x61,
2262        0x00, 0x00, 0xdc, 0x00,
2263        0x00, 0x00, 0xd8, 0x00,
2264        0x00, 0x00, 0xdf, 0xff,
2265        0x00, 0x00, 0xff, 0xfe,
2266        0x00, 0x10, 0xab, 0xcd,
2267        0x00, 0x10, 0xff, 0xff
2268    };
2269
2270    /* expected test results */
2271    static const int32_t results[]={
2272        /* number of bytes read, code point */
2273        4, 0x61,
2274        4, 0x3061,
2275        4, 0xfffd,
2276        4, 0xfffd,
2277        4, 0xfffd,
2278        4, 0xfffe,
2279        4, 0x10abcd,
2280        4, 0x10ffff
2281    };
2282
2283    /* error test input */
2284    static const uint8_t in2[]={
2285        0x00, 0x00, 0x00, 0x61,
2286        0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
2287        0x00, 0x00, 0x00, 0x62,
2288        0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2289        0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
2290        0x00, 0x00, 0x01, 0x62,
2291        0x00, 0x00, 0x02, 0x62
2292    };
2293
2294    /* expected error test results */
2295    static const int32_t results2[]={
2296        /* number of bytes read, code point */
2297        4,  0x61,
2298        8,  0x62,
2299        12, 0x162,
2300        4,  0x262
2301    };
2302
2303    UConverterToUCallback cb;
2304    const void *p;
2305
2306    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2307    UErrorCode errorCode=U_ZERO_ERROR;
2308    UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2309    if(U_FAILURE(errorCode)) {
2310        log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2311        return;
2312    }
2313    TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2314
2315    /* Test the condition when source >= sourceLimit */
2316    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2317
2318    /* test error behavior with a skip callback */
2319    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2320    source=(const char *)in2;
2321    limit=(const char *)(in2+sizeof(in2));
2322    TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2323
2324    ucnv_close(cnv);
2325}
2326
2327static void
2328TestUTF32LE() {
2329    /* test input */
2330    static const uint8_t in[]={
2331        0x61, 0x00, 0x00, 0x00,
2332        0x61, 0x30, 0x00, 0x00,
2333        0x00, 0xdc, 0x00, 0x00,
2334        0x00, 0xd8, 0x00, 0x00,
2335        0xff, 0xdf, 0x00, 0x00,
2336        0xfe, 0xff, 0x00, 0x00,
2337        0xcd, 0xab, 0x10, 0x00,
2338        0xff, 0xff, 0x10, 0x00
2339    };
2340
2341    /* expected test results */
2342    static const int32_t results[]={
2343        /* number of bytes read, code point */
2344        4, 0x61,
2345        4, 0x3061,
2346        4, 0xfffd,
2347        4, 0xfffd,
2348        4, 0xfffd,
2349        4, 0xfffe,
2350        4, 0x10abcd,
2351        4, 0x10ffff
2352    };
2353
2354    /* error test input */
2355    static const uint8_t in2[]={
2356        0x61, 0x00, 0x00, 0x00,
2357        0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
2358        0x62, 0x00, 0x00, 0x00,
2359        0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2360        0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
2361        0x62, 0x01, 0x00, 0x00,
2362        0x62, 0x02, 0x00, 0x00,
2363    };
2364
2365    /* expected error test results */
2366    static const int32_t results2[]={
2367        /* number of bytes read, code point */
2368        4,  0x61,
2369        8,  0x62,
2370        12, 0x162,
2371        4,  0x262,
2372    };
2373
2374    UConverterToUCallback cb;
2375    const void *p;
2376
2377    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2378    UErrorCode errorCode=U_ZERO_ERROR;
2379    UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2380    if(U_FAILURE(errorCode)) {
2381        log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2382        return;
2383    }
2384    TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2385
2386    /* Test the condition when source >= sourceLimit */
2387    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2388
2389    /* test error behavior with a skip callback */
2390    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2391    source=(const char *)in2;
2392    limit=(const char *)(in2+sizeof(in2));
2393    TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2394
2395    ucnv_close(cnv);
2396}
2397
2398static void
2399TestLATIN1() {
2400    /* test input */
2401    static const uint8_t in[]={
2402       0x61,
2403       0x31,
2404       0x32,
2405       0xc0,
2406       0xf0,
2407       0xf4,
2408    };
2409
2410    /* expected test results */
2411    static const int32_t results[]={
2412        /* number of bytes read, code point */
2413        1, 0x61,
2414        1, 0x31,
2415        1, 0x32,
2416        1, 0xc0,
2417        1, 0xf0,
2418        1, 0xf4,
2419    };
2420    static const uint16_t in1[] = {
2421        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2422        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2423        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2424        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2425        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2426        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2427        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2428        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2429        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2430        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2431        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2432        0xcb, 0x82
2433    };
2434    static const uint8_t out1[] = {
2435        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2436        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2437        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2438        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2439        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2440        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2441        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2442        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2443        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2444        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2445        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2446        0xcb, 0x82
2447    };
2448    static const uint16_t in2[]={
2449        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2450        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2451        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2452        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2453        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2454        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2455        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2456        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2457        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2458        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2459        0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2460        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2461        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2462        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2463        0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2464        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2465        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2466        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2467        0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2468        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2469        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2470        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2471        0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2472        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2473        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2474        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2475        0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2476        0x37, 0x20, 0x2A, 0x2F,
2477    };
2478    static const unsigned char out2[]={
2479        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2480        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2481        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2482        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2483        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2484        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2485        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2486        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2487        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2488        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2489        0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2490        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2491        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2492        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2493        0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2494        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2495        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2496        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2497        0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2498        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2499        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2500        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2501        0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2502        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2503        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2504        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2505        0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2506        0x37, 0x20, 0x2A, 0x2F,
2507    };
2508    const char *source=(const char *)in;
2509    const char *limit=(const char *)in+sizeof(in);
2510
2511    UErrorCode errorCode=U_ZERO_ERROR;
2512    UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2513    if(U_FAILURE(errorCode)) {
2514        log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2515        return;
2516    }
2517    TestNextUChar(cnv, source, limit, results, "LATIN_1");
2518    /* Test the condition when source >= sourceLimit */
2519    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2520    TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2521    TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2522
2523    ucnv_close(cnv);
2524}
2525
2526static void
2527TestSBCS() {
2528    /* test input */
2529    static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2530    /* expected test results */
2531    static const int32_t results[]={
2532        /* number of bytes read, code point */
2533        1, 0x61,
2534        1, 0xbf,
2535        1, 0xc4,
2536        1, 0x2021,
2537        1, 0xf8ff,
2538        1, 0x00d9
2539    };
2540
2541    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2542    UErrorCode errorCode=U_ZERO_ERROR;
2543    UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2544    if(U_FAILURE(errorCode)) {
2545        log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2546        return;
2547    }
2548    TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2549    /* Test the condition when source >= sourceLimit */
2550    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2551    /*Test for Illegal character */ /*
2552    {
2553    static const uint8_t input1[]={ 0xA1 };
2554    const char* illegalsource=(const char*)input1;
2555    TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2556    }
2557   */
2558    ucnv_close(cnv);
2559}
2560
2561static void
2562TestDBCS() {
2563    /* test input */
2564    static const uint8_t in[]={
2565        0x44, 0x6a,
2566        0xc4, 0x9c,
2567        0x7a, 0x74,
2568        0x46, 0xab,
2569        0x42, 0x5b,
2570
2571    };
2572
2573    /* expected test results */
2574    static const int32_t results[]={
2575        /* number of bytes read, code point */
2576        2, 0x00a7,
2577        2, 0xe1d2,
2578        2, 0x6962,
2579        2, 0xf842,
2580        2, 0xffe5,
2581    };
2582
2583    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2584    UErrorCode errorCode=U_ZERO_ERROR;
2585
2586    UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2587    if(U_FAILURE(errorCode)) {
2588        log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2589        return;
2590    }
2591    TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2592    /* Test the condition when source >= sourceLimit */
2593    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2594    /*Test for the condition where there is an invalid character*/
2595    {
2596        static const uint8_t source2[]={0x1a, 0x1b};
2597        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2598    }
2599    /*Test for the condition where we have a truncated char*/
2600    {
2601        static const uint8_t source1[]={0xc4};
2602        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2603        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2604    }
2605    ucnv_close(cnv);
2606}
2607
2608static void
2609TestMBCS() {
2610    /* test input */
2611    static const uint8_t in[]={
2612        0x01,
2613        0xa6, 0xa3,
2614        0x00,
2615        0xa6, 0xa1,
2616        0x08,
2617        0xc2, 0x76,
2618        0xc2, 0x78,
2619
2620    };
2621
2622    /* expected test results */
2623    static const int32_t results[]={
2624        /* number of bytes read, code point */
2625        1, 0x0001,
2626        2, 0x250c,
2627        1, 0x0000,
2628        2, 0x2500,
2629        1, 0x0008,
2630        2, 0xd60c,
2631        2, 0xd60e,
2632    };
2633
2634    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2635    UErrorCode errorCode=U_ZERO_ERROR;
2636
2637    UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2638    if(U_FAILURE(errorCode)) {
2639        log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2640        return;
2641    }
2642    TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2643    /* Test the condition when source >= sourceLimit */
2644    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2645    /*Test for the condition where there is an invalid character*/
2646    {
2647        static const uint8_t source2[]={0xa1, 0x80};
2648        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2649    }
2650    /*Test for the condition where we have a truncated char*/
2651    {
2652        static const uint8_t source1[]={0xc4};
2653        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2654        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2655    }
2656    ucnv_close(cnv);
2657
2658}
2659
2660#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2661static void
2662TestICCRunout() {
2663/*    { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2664
2665    const char *cnvName = "ibm-1363";
2666    UErrorCode status = U_ZERO_ERROR;
2667    const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2668    /* UChar   expectUData[] = { 0x00a1, 0x001a }; */
2669    const char *source = sourceData;
2670    const char *sourceLim = sourceData+sizeof(sourceData);
2671    UChar c1, c2, c3;
2672    UConverter *cnv=ucnv_open(cnvName, &status);
2673    if(U_FAILURE(status)) {
2674        log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2675	return;
2676    }
2677
2678#if 0
2679    {
2680    UChar   targetBuf[256];
2681    UChar   *target = targetBuf;
2682    UChar   *targetLim = target+256;
2683    ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
2684
2685    log_info("After convert: target@%d, source@%d, status%s\n",
2686	     target-targetBuf, source-sourceData, u_errorName(status));
2687
2688    if(U_FAILURE(status)) {
2689	log_err("Failed to convert: %s\n", u_errorName(status));
2690    } else {
2691
2692    }
2693    }
2694#endif
2695
2696    c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2697    log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2698
2699    c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2700    log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2701
2702    c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2703    log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2704
2705    if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2706	log_verbose("OK\n");
2707    } else {
2708	log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2709    }
2710
2711    ucnv_close(cnv);
2712
2713}
2714#endif
2715
2716#ifdef U_ENABLE_GENERIC_ISO_2022
2717
2718static void
2719TestISO_2022() {
2720    /* test input */
2721    static const uint8_t in[]={
2722        0x1b, 0x25, 0x42,
2723        0x31,
2724        0x32,
2725        0x61,
2726        0xc2, 0x80,
2727        0xe0, 0xa0, 0x80,
2728        0xf0, 0x90, 0x80, 0x80
2729    };
2730
2731
2732
2733    /* expected test results */
2734    static const int32_t results[]={
2735        /* number of bytes read, code point */
2736        4, 0x0031,  /* 4 bytes including the escape sequence */
2737        1, 0x0032,
2738        1, 0x61,
2739        2, 0x80,
2740        3, 0x800,
2741        4, 0x10000
2742    };
2743
2744    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2745    UErrorCode errorCode=U_ZERO_ERROR;
2746    UConverter *cnv;
2747
2748    cnv=ucnv_open("ISO_2022", &errorCode);
2749    if(U_FAILURE(errorCode)) {
2750        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2751        return;
2752    }
2753    TestNextUChar(cnv, source, limit, results, "ISO_2022");
2754
2755    /* Test the condition when source >= sourceLimit */
2756    TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2757    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2758    /*Test for the condition where we have a truncated char*/
2759    {
2760        static const uint8_t source1[]={0xc4};
2761        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2762        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2763    }
2764    /*Test for the condition where there is an invalid character*/
2765    {
2766        static const uint8_t source2[]={0xa1, 0x01};
2767        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2768    }
2769    ucnv_close(cnv);
2770}
2771
2772#endif
2773
2774static void
2775TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2776    const UChar* uSource;
2777    const UChar* uSourceLimit;
2778    const char* cSource;
2779    const char* cSourceLimit;
2780    UChar *uTargetLimit =NULL;
2781    UChar *uTarget;
2782    char *cTarget;
2783    const char *cTargetLimit;
2784    char *cBuf;
2785    UChar *uBuf; /*,*test;*/
2786    int32_t uBufSize = 120;
2787    int len=0;
2788    int i=2;
2789    UErrorCode errorCode=U_ZERO_ERROR;
2790    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2791    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2792    ucnv_reset(cnv);
2793    for(;--i>0; ){
2794        uSource = (UChar*) source;
2795        uSourceLimit=(const UChar*)sourceLimit;
2796        cTarget = cBuf;
2797        uTarget = uBuf;
2798        cSource = cBuf;
2799        cTargetLimit = cBuf;
2800        uTargetLimit = uBuf;
2801
2802        do{
2803
2804            cTargetLimit = cTargetLimit+ i;
2805            ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2806            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2807               errorCode=U_ZERO_ERROR;
2808                continue;
2809            }
2810
2811            if(U_FAILURE(errorCode)){
2812                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2813                return;
2814            }
2815
2816        }while (uSource<uSourceLimit);
2817
2818        cSourceLimit =cTarget;
2819        do{
2820            uTargetLimit=uTargetLimit+i;
2821            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2822            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2823               errorCode=U_ZERO_ERROR;
2824                continue;
2825            }
2826            if(U_FAILURE(errorCode)){
2827                   log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2828                    return;
2829            }
2830        }while(cSource<cSourceLimit);
2831
2832        uSource = source;
2833        /*test =uBuf;*/
2834        for(len=0;len<(int)(source - sourceLimit);len++){
2835            if(uBuf[len]!=uSource[len]){
2836                log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2837            }
2838        }
2839    }
2840    free(uBuf);
2841    free(cBuf);
2842}
2843/* Test for Jitterbug 778 */
2844static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2845    const UChar* uSource;
2846    const UChar* uSourceLimit;
2847    const char* cSource;
2848    UChar *uTargetLimit =NULL;
2849    UChar *uTarget;
2850    char *cTarget;
2851    const char *cTargetLimit;
2852    char *cBuf;
2853    UChar *uBuf,*test;
2854    int32_t uBufSize = 120;
2855    int numCharsInTarget=0;
2856    UErrorCode errorCode=U_ZERO_ERROR;
2857    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2858    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2859    uSource = source;
2860    uSourceLimit=sourceLimit;
2861    cTarget = cBuf;
2862    cTargetLimit = cBuf +uBufSize*5;
2863    uTarget = uBuf;
2864    uTargetLimit = uBuf+ uBufSize*5;
2865    ucnv_reset(cnv);
2866    numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2867    if(U_FAILURE(errorCode)){
2868        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2869        return;
2870    }
2871    cSource = cBuf;
2872    test =uBuf;
2873    ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2874    if(U_FAILURE(errorCode)){
2875        log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2876        return;
2877    }
2878    uSource = source;
2879    while(uSource<uSourceLimit){
2880        if(*test!=*uSource){
2881
2882            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2883        }
2884        uSource++;
2885        test++;
2886    }
2887    free(uBuf);
2888    free(cBuf);
2889}
2890
2891static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2892    const UChar* uSource;
2893    const UChar* uSourceLimit;
2894    const char* cSource;
2895    const char* cSourceLimit;
2896    UChar *uTargetLimit =NULL;
2897    UChar *uTarget;
2898    char *cTarget;
2899    const char *cTargetLimit;
2900    char *cBuf;
2901    UChar *uBuf; /*,*test;*/
2902    int32_t uBufSize = 120;
2903    int len=0;
2904    int i=2;
2905    const UChar *temp = sourceLimit;
2906    UErrorCode errorCode=U_ZERO_ERROR;
2907    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2908    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2909
2910    ucnv_reset(cnv);
2911    for(;--i>0;){
2912        uSource = (UChar*) source;
2913        cTarget = cBuf;
2914        uTarget = uBuf;
2915        cSource = cBuf;
2916        cTargetLimit = cBuf;
2917        uTargetLimit = uBuf+uBufSize*5;
2918        cTargetLimit = cTargetLimit+uBufSize*10;
2919        uSourceLimit=uSource;
2920        do{
2921
2922            if (uSourceLimit < sourceLimit) {
2923                uSourceLimit = uSourceLimit+1;
2924            }
2925            ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2926            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2927               errorCode=U_ZERO_ERROR;
2928                continue;
2929            }
2930
2931            if(U_FAILURE(errorCode)){
2932                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2933                return;
2934            }
2935
2936        }while (uSource<temp);
2937
2938        cSourceLimit =cBuf;
2939        do{
2940            if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2941                cSourceLimit = cSourceLimit+1;
2942            }
2943            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2944            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2945               errorCode=U_ZERO_ERROR;
2946                continue;
2947            }
2948            if(U_FAILURE(errorCode)){
2949                   log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2950                    return;
2951            }
2952        }while(cSource<cTarget);
2953
2954        uSource = source;
2955        /*test =uBuf;*/
2956        for(;len<(int)(source - sourceLimit);len++){
2957            if(uBuf[len]!=uSource[len]){
2958                log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2959            }
2960        }
2961    }
2962    free(uBuf);
2963    free(cBuf);
2964}
2965static void
2966TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2967                     const uint16_t results[], const char* message){
2968/*     const char* s0; */
2969     const char* s=(char*)source;
2970     const uint16_t *r=results;
2971     UErrorCode errorCode=U_ZERO_ERROR;
2972     uint32_t c,exC;
2973     ucnv_reset(cnv);
2974     while(s<limit) {
2975	 /* s0=s; */
2976        c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2977        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2978            break; /* no more significant input */
2979        } else if(U_FAILURE(errorCode)) {
2980            log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2981            break;
2982        } else {
2983            if(U16_IS_LEAD(*r)){
2984                int i =0, len = 2;
2985                U16_NEXT(r, i, len, exC);
2986                r++;
2987            }else{
2988                exC = *r;
2989            }
2990            if(c!=(uint32_t)(exC))
2991                log_err("%s ucnv_getNextUChar() Expected:  \\u%04X Got:  \\u%04X \n",message,(uint32_t) (*r),c);
2992        }
2993        r++;
2994    }
2995}
2996
2997static int TestJitterbug930(const char* enc){
2998    UErrorCode err = U_ZERO_ERROR;
2999    UConverter*converter;
3000    char out[80];
3001    char*target = out;
3002    UChar in[4];
3003    const UChar*source = in;
3004    int32_t off[80];
3005    int32_t* offsets = off;
3006    int numOffWritten=0;
3007    UBool flush = 0;
3008    converter = my_ucnv_open(enc, &err);
3009
3010    in[0] = 0x41;     /* 0x4E00;*/
3011    in[1] = 0x4E01;
3012    in[2] = 0x4E02;
3013    in[3] = 0x4E03;
3014
3015    memset(off, '*', sizeof(off));
3016
3017    ucnv_fromUnicode (converter,
3018            &target,
3019            target+2,
3020            &source,
3021            source+3,
3022            offsets,
3023            flush,
3024            &err);
3025
3026        /* writes three bytes into the output buffer: 41 1B 24
3027        * but offsets contains 0 1 1
3028    */
3029    while(*offsets< off[10]){
3030        numOffWritten++;
3031        offsets++;
3032    }
3033    log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3034    if(numOffWritten!= (int)(target-out)){
3035        log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3036    }
3037
3038    err = U_ZERO_ERROR;
3039
3040    memset(off,'*' , sizeof(off));
3041
3042    flush = 1;
3043    offsets=off;
3044    ucnv_fromUnicode (converter,
3045            &target,
3046            target+4,
3047            &source,
3048            source,
3049            offsets,
3050            flush,
3051            &err);
3052    numOffWritten=0;
3053    while(*offsets< off[10]){
3054        numOffWritten++;
3055        if(*offsets!= -1){
3056            log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3057        }
3058        offsets++;
3059    }
3060
3061    /* writes 42 43 7A into output buffer,
3062     * offsets contains -1 -1 -1
3063     */
3064    ucnv_close(converter);
3065    return 0;
3066}
3067
3068static void
3069TestHZ() {
3070    /* test input */
3071    static const uint16_t in[]={
3072            0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3073            0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3074            0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3075            0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3076            0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3077            0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3078            0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3079            0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3080            0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3081            0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3082            0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3083            0x005A, 0x005B, 0x005C, 0x000A
3084      };
3085    const UChar* uSource;
3086    const UChar* uSourceLimit;
3087    const char* cSource;
3088    const char* cSourceLimit;
3089    UChar *uTargetLimit =NULL;
3090    UChar *uTarget;
3091    char *cTarget;
3092    const char *cTargetLimit;
3093    char *cBuf;
3094    UChar *uBuf,*test;
3095    int32_t uBufSize = 120;
3096    UErrorCode errorCode=U_ZERO_ERROR;
3097    UConverter *cnv;
3098    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3099    int32_t* myOff= offsets;
3100    cnv=ucnv_open("HZ", &errorCode);
3101    if(U_FAILURE(errorCode)) {
3102        log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3103        return;
3104    }
3105
3106    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3107    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3108    uSource = (const UChar*)in;
3109    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3110    cTarget = cBuf;
3111    cTargetLimit = cBuf +uBufSize*5;
3112    uTarget = uBuf;
3113    uTargetLimit = uBuf+ uBufSize*5;
3114    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3115    if(U_FAILURE(errorCode)){
3116        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3117        return;
3118    }
3119    cSource = cBuf;
3120    cSourceLimit =cTarget;
3121    test =uBuf;
3122    myOff=offsets;
3123    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3124    if(U_FAILURE(errorCode)){
3125        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3126        return;
3127    }
3128    uSource = (const UChar*)in;
3129    while(uSource<uSourceLimit){
3130        if(*test!=*uSource){
3131
3132            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3133        }
3134        uSource++;
3135        test++;
3136    }
3137    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3138    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3139    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3140    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3141    TestJitterbug930("csISO2022JP");
3142    ucnv_close(cnv);
3143    free(offsets);
3144    free(uBuf);
3145    free(cBuf);
3146}
3147
3148static void
3149TestISCII(){
3150        /* test input */
3151    static const uint16_t in[]={
3152        /* test full range of Devanagari */
3153        0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3154        0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3155        0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3156        0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3157        0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3158        0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3159        0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3160        0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3161        0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3162        0x096D,0x096E,0x096F,
3163        /* test Soft halant*/
3164        0x0915,0x094d, 0x200D,
3165        /* test explicit halant */
3166        0x0915,0x094d, 0x200c,
3167        /* test double danda */
3168        0x965,
3169        /* test ASCII */
3170        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3171        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3172        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3173        /* tests from Lotus */
3174        0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3175        0x0930,0x094D,0x200D,
3176        0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3177        0x0915,0x0921,0x002B,0x095F,
3178        /* tamil range */
3179        0x0B86, 0xB87, 0xB88,
3180        /* telugu range */
3181        0x0C05, 0x0C02, 0x0C03,0x0c31,
3182        /* kannada range */
3183        0x0C85, 0xC82, 0x0C83,
3184        /* test Abbr sign and Anudatta */
3185        0x0970, 0x952,
3186       /* 0x0958,
3187        0x0959,
3188        0x095A,
3189        0x095B,
3190        0x095C,
3191        0x095D,
3192        0x095E,
3193        0x095F,*/
3194        0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3195        0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3196        0x090C ,
3197        0x0962,
3198        0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3199        0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3200        0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3201        0x093D /* Avagraha  0xEA, 0xE9*/,
3202        0x0958,
3203        0x0959,
3204        0x095A,
3205        0x095B,
3206        0x095C,
3207        0x095D,
3208        0x095E,
3209        0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3210      };
3211    static const unsigned char byteArr[]={
3212
3213        0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3214        0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3215        0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3216        0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3217        0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3218        0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3219        0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3220        0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3221        0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3222        0xf8,0xf9,0xfa,
3223        /* test soft halant */
3224        0xb3, 0xE8, 0xE9,
3225        /* test explicit halant */
3226        0xb3, 0xE8, 0xE8,
3227        /* test double danda */
3228        0xea, 0xea,
3229        /* test ASCII */
3230        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3231        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3232        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3233        /* test ATR code */
3234
3235        /* tests from Lotus */
3236        0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3237        0xEF,0x42,0xCF,0xE8,0xD9,
3238        0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3239        0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3240        /* tamil range */
3241        0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3242        /* telugu range */
3243        0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3244        /* kannada range */
3245        0xEF, 0x48,0xa4, 0xa2, 0xa3,
3246        /* anudatta and abbreviation sign */
3247        0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3248
3249
3250        0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3251
3252        0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3253
3254        0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3255
3256        0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3257
3258        0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3259
3260        0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3261
3262        0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3263
3264        0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3265
3266        0xB3, 0xE9, /* Ka + NUKTA */
3267
3268        0xB4, 0xE9, /* Kha + NUKTA */
3269
3270        0xB5, 0xE9, /* Ga + NUKTA */
3271
3272        0xBA, 0xE9,
3273
3274        0xBF, 0xE9,
3275
3276        0xC0, 0xE9,
3277
3278        0xC9, 0xE9,
3279        /* INV halant RA    */
3280        0xD9, 0xE8, 0xCF,
3281        0x00, 0x00A0,
3282        /* just consume unhandled codepoints */
3283        0xEF, 0x30,
3284
3285    };
3286    testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
3287    TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3288
3289}
3290
3291static void
3292TestISO_2022_JP() {
3293    /* test input */
3294    static const uint16_t in[]={
3295        0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3296        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3297        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3298        0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3299        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3300        0x201D, 0x3014, 0x000D, 0x000A,
3301        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3302        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3303        };
3304    const UChar* uSource;
3305    const UChar* uSourceLimit;
3306    const char* cSource;
3307    const char* cSourceLimit;
3308    UChar *uTargetLimit =NULL;
3309    UChar *uTarget;
3310    char *cTarget;
3311    const char *cTargetLimit;
3312    char *cBuf;
3313    UChar *uBuf,*test;
3314    int32_t uBufSize = 120;
3315    UErrorCode errorCode=U_ZERO_ERROR;
3316    UConverter *cnv;
3317    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3318    int32_t* myOff= offsets;
3319    cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3320    if(U_FAILURE(errorCode)) {
3321        log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3322        return;
3323    }
3324
3325    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3326    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3327    uSource = (const UChar*)in;
3328    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3329    cTarget = cBuf;
3330    cTargetLimit = cBuf +uBufSize*5;
3331    uTarget = uBuf;
3332    uTargetLimit = uBuf+ uBufSize*5;
3333    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3334    if(U_FAILURE(errorCode)){
3335        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3336        return;
3337    }
3338    cSource = cBuf;
3339    cSourceLimit =cTarget;
3340    test =uBuf;
3341    myOff=offsets;
3342    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3343    if(U_FAILURE(errorCode)){
3344        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3345        return;
3346    }
3347
3348    uSource = (const UChar*)in;
3349    while(uSource<uSourceLimit){
3350        if(*test!=*uSource){
3351
3352            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3353        }
3354        uSource++;
3355        test++;
3356    }
3357
3358    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3359    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3360    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3361    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3362    TestJitterbug930("csISO2022JP");
3363    ucnv_close(cnv);
3364    free(uBuf);
3365    free(cBuf);
3366    free(offsets);
3367}
3368
3369static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3370    const UChar* uSource;
3371    const UChar* uSourceLimit;
3372    const char* cSource;
3373    const char* cSourceLimit;
3374    UChar *uTargetLimit =NULL;
3375    UChar *uTarget;
3376    char *cTarget;
3377    const char *cTargetLimit;
3378    char *cBuf;
3379    UChar *uBuf,*test;
3380    int32_t uBufSize = 120*10;
3381    UErrorCode errorCode=U_ZERO_ERROR;
3382    UConverter *cnv;
3383    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3384    int32_t* myOff= offsets;
3385    cnv=my_ucnv_open(conv, &errorCode);
3386    if(U_FAILURE(errorCode)) {
3387        log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3388        return;
3389    }
3390
3391    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
3392    cBuf =(char*)malloc(uBufSize * sizeof(char));
3393    uSource = (const UChar*)in;
3394    uSourceLimit=uSource+len;
3395    cTarget = cBuf;
3396    cTargetLimit = cBuf +uBufSize;
3397    uTarget = uBuf;
3398    uTargetLimit = uBuf+ uBufSize;
3399    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3400    if(U_FAILURE(errorCode)){
3401        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3402        return;
3403    }
3404    /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3405    cSource = cBuf;
3406    cSourceLimit =cTarget;
3407    test =uBuf;
3408    myOff=offsets;
3409    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3410    if(U_FAILURE(errorCode)){
3411        log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3412        return;
3413    }
3414
3415    uSource = (const UChar*)in;
3416    while(uSource<uSourceLimit){
3417        if(*test!=*uSource){
3418            log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3419        }
3420        uSource++;
3421        test++;
3422    }
3423    TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3424    TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3425    TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3426    if(byteArr && byteArrLen!=0){
3427        TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3428        TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3429        {
3430            cSource = byteArr;
3431            cSourceLimit = cSource+byteArrLen;
3432            test=uBuf;
3433            myOff = offsets;
3434            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3435            if(U_FAILURE(errorCode)){
3436                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3437                return;
3438            }
3439
3440            uSource = (const UChar*)in;
3441            while(uSource<uSourceLimit){
3442                if(*test!=*uSource){
3443                    log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3444                }
3445                uSource++;
3446                test++;
3447            }
3448        }
3449    }
3450
3451    ucnv_close(cnv);
3452    free(uBuf);
3453    free(cBuf);
3454    free(offsets);
3455}
3456static UChar U_CALLCONV
3457_charAt(int32_t offset, void *context) {
3458    return ((char*)context)[offset];
3459}
3460
3461static int32_t
3462unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3463    int32_t srcIndex=0;
3464    int32_t dstIndex=0;
3465    if(U_FAILURE(*status)){
3466        return 0;
3467    }
3468    if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3469        *status = U_ILLEGAL_ARGUMENT_ERROR;
3470        return 0;
3471    }
3472    if(srcLen==-1){
3473        srcLen = (int32_t)uprv_strlen(src);
3474    }
3475
3476    for (; srcIndex<srcLen; ) {
3477        UChar32 c = src[srcIndex++];
3478        if (c == 0x005C /*'\\'*/) {
3479            c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3480            if (c == (UChar32)0xFFFFFFFF) {
3481                *status=U_INVALID_CHAR_FOUND; /* return empty string */
3482                break; /* invalid escape sequence */
3483            }
3484        }
3485        if(dstIndex < dstLen){
3486            if(c>0xFFFF){
3487               dst[dstIndex++] = U16_LEAD(c);
3488               if(dstIndex<dstLen){
3489                    dst[dstIndex]=U16_TRAIL(c);
3490               }else{
3491                   *status=U_BUFFER_OVERFLOW_ERROR;
3492               }
3493            }else{
3494                dst[dstIndex]=(UChar)c;
3495            }
3496
3497        }else{
3498            *status = U_BUFFER_OVERFLOW_ERROR;
3499        }
3500        dstIndex++; /* for preflighting */
3501    }
3502    return dstIndex;
3503}
3504
3505static void
3506TestFullRoundtrip(const char* cp){
3507    UChar usource[10] ={0};
3508    UChar nsrc[10] = {0};
3509    uint32_t i=1;
3510    int len=0, ulen;
3511    nsrc[0]=0x0061;
3512    /* Test codepoint 0 */
3513    TestConv(usource,1,cp,"",NULL,0);
3514    TestConv(usource,2,cp,"",NULL,0);
3515    nsrc[2]=0x5555;
3516    TestConv(nsrc,3,cp,"",NULL,0);
3517
3518    for(;i<=0x10FFFF;i++){
3519        if(i==0xD800){
3520            i=0xDFFF;
3521            continue;
3522        }
3523        if(i<=0xFFFF){
3524            usource[0] =(UChar) i;
3525            len=1;
3526        }else{
3527            usource[0]=U16_LEAD(i);
3528            usource[1]=U16_TRAIL(i);
3529            len=2;
3530        }
3531        ulen=len;
3532        if(i==0x80) {
3533            usource[2]=0;
3534        }
3535        /* Test only single code points */
3536        TestConv(usource,ulen,cp,"",NULL,0);
3537        /* Test codepoint repeated twice */
3538        usource[ulen]=usource[0];
3539        usource[ulen+1]=usource[1];
3540        ulen+=len;
3541        TestConv(usource,ulen,cp,"",NULL,0);
3542        /* Test codepoint repeated 3 times */
3543        usource[ulen]=usource[0];
3544        usource[ulen+1]=usource[1];
3545        ulen+=len;
3546        TestConv(usource,ulen,cp,"",NULL,0);
3547        /* Test codepoint in between 2 codepoints */
3548        nsrc[1]=usource[0];
3549        nsrc[2]=usource[1];
3550        nsrc[len+1]=0x5555;
3551        TestConv(nsrc,len+2,cp,"",NULL,0);
3552        uprv_memset(usource,0,sizeof(UChar)*10);
3553    }
3554}
3555
3556static void
3557TestRoundTrippingAllUTF(void){
3558    if(!getTestOption(QUICK_OPTION)){
3559        log_verbose("Running exhaustive round trip test for BOCU-1\n");
3560        TestFullRoundtrip("BOCU-1");
3561        log_verbose("Running exhaustive round trip test for SCSU\n");
3562        TestFullRoundtrip("SCSU");
3563        log_verbose("Running exhaustive round trip test for UTF-8\n");
3564        TestFullRoundtrip("UTF-8");
3565        log_verbose("Running exhaustive round trip test for CESU-8\n");
3566        TestFullRoundtrip("CESU-8");
3567        log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3568        TestFullRoundtrip("UTF-16BE");
3569        log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3570        TestFullRoundtrip("UTF-16LE");
3571        log_verbose("Running exhaustive round trip test for UTF-16\n");
3572        TestFullRoundtrip("UTF-16");
3573        log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3574        TestFullRoundtrip("UTF-32BE");
3575        log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3576        TestFullRoundtrip("UTF-32LE");
3577        log_verbose("Running exhaustive round trip test for UTF-32\n");
3578        TestFullRoundtrip("UTF-32");
3579        log_verbose("Running exhaustive round trip test for UTF-7\n");
3580        TestFullRoundtrip("UTF-7");
3581        log_verbose("Running exhaustive round trip test for UTF-7\n");
3582        TestFullRoundtrip("UTF-7,version=1");
3583        log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3584        TestFullRoundtrip("IMAP-mailbox-name");
3585        /*
3586         *
3587         * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3588         * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3589         * The old mappings remain as fallbacks.
3590         * This test may be reintroduced at a later time.
3591         *
3592         * 110118 - mow
3593         */
3594         /*
3595         log_verbose("Running exhaustive round trip test for GB18030\n");
3596         TestFullRoundtrip("GB18030");
3597         */
3598    }
3599}
3600
3601static void
3602TestSCSU() {
3603
3604    static const uint16_t germanUTF16[]={
3605        0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3606    };
3607
3608    static const uint8_t germanSCSU[]={
3609        0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3610    };
3611
3612    static const uint16_t russianUTF16[]={
3613        0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3614    };
3615
3616    static const uint8_t russianSCSU[]={
3617        0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3618    };
3619
3620    static const uint16_t japaneseUTF16[]={
3621        0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3622        0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3623        0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3624        0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3625        0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3626        0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3627        0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3628        0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3629        0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3630        0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3631        0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3632        0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3633        0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3634        0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3635        0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3636    };
3637
3638    /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3639     it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3640    static const uint8_t japaneseSCSU[]={
3641        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3642        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3643        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3644        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3645        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3646        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3647        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3648        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3649        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3650        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3651        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3652        0xcb, 0x82
3653    };
3654
3655    static const uint16_t allFeaturesUTF16[]={
3656        0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3657        0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3658        0x01df, 0xf000, 0xdbff, 0xdfff
3659    };
3660
3661    /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3662     * result here (34B vs. 35B)
3663     */
3664    static const uint8_t allFeaturesSCSU[]={
3665        0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3666        0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3667        0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3668        0xdf, 0x14, 0x80, 0x15, 0xff
3669    };
3670    static const uint16_t monkeyIn[]={
3671        0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3672        0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3673        0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3674        0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3675        0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3676        0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3677        0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3678        0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3679        0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3680        0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3681        0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3682        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3683        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3684        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3685        0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3686        0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3687        0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3688        0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3689        0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3690        /* test non-BMP code points */
3691        0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3692        0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3693        0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3694        0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3695        0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3696        0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3697        0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3698        0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3699        0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3700        0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3701        0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3702
3703
3704        0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3705        0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3706        0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3707        0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3708        0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3709    };
3710    static const char *fTestCases [] = {
3711          "\\ud800\\udc00", /* smallest surrogate*/
3712          "\\ud8ff\\udcff",
3713          "\\udBff\\udFff", /* largest surrogate pair*/
3714          "\\ud834\\udc00",
3715          "\\U0010FFFF",
3716          "Hello \\u9292 \\u9192 World!",
3717          "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3718          "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3719
3720          "\\u0648\\u06c8", /* catch missing reset*/
3721          "\\u0648\\u06c8",
3722
3723          "\\u4444\\uE001", /* lowest quotable*/
3724          "\\u4444\\uf2FF", /* highest quotable*/
3725          "\\u4444\\uf188\\u4444",
3726          "\\u4444\\uf188\\uf288",
3727          "\\u4444\\uf188abc\\u0429\\uf288",
3728          "\\u9292\\u2222",
3729          "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3730          "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3731          "Hello World!123456",
3732          "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3733
3734          "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
3735          "abc\\u4411d",      /* uses SQU*/
3736          "abc\\u4411\\u4412d",/* uses SCU*/
3737          "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3738          "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3739          "\\u9292\\u2222",
3740          "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3741          "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3742          "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3743
3744          "", /* empty input*/
3745          "\\u0000", /* smallest BMP character*/
3746          "\\uFFFF", /* largest BMP character*/
3747
3748          /* regression tests*/
3749          "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3750          "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3751          "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3752          "\\u0041\\u00df\\u0401\\u015f",
3753          "\\u9066\\u2123abc",
3754          "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3755          "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3756    };
3757    int i=0;
3758    for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
3759        const char* cSrc = fTestCases[i];
3760        UErrorCode status = U_ZERO_ERROR;
3761        int32_t cSrcLen,srcLen;
3762        UChar* src;
3763        /* UConverter* cnv = ucnv_open("SCSU",&status); */
3764        cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3765        src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3766        srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3767        log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3768        TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3769        free(src);
3770    }
3771    TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3772    TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3773    TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3774    TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3775    TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3776    TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3777    TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3778}
3779
3780#if !UCONFIG_NO_LEGACY_CONVERSION
3781static void TestJitterbug2346(){
3782    char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3783                      0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3784    uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3785
3786    UChar uTarget[500]={'\0'};
3787    UChar* utarget=uTarget;
3788    UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3789
3790    char cTarget[500]={'\0'};
3791    char* ctarget=cTarget;
3792    char* ctargetLimit=cTarget+sizeof(cTarget);
3793    const char* csource=source;
3794    UChar* temp = expected;
3795    UErrorCode err=U_ZERO_ERROR;
3796
3797    UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3798    if(U_FAILURE(err)) {
3799        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3800        return;
3801    }
3802    ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3803    if(U_FAILURE(err)) {
3804        log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3805        return;
3806    }
3807    utargetLimit=utarget;
3808    utarget = uTarget;
3809    while(utarget<utargetLimit){
3810        if(*temp!=*utarget){
3811
3812            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3813        }
3814        utarget++;
3815        temp++;
3816    }
3817    ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3818    if(U_FAILURE(err)) {
3819        log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3820        return;
3821    }
3822    ctargetLimit=ctarget;
3823    ctarget =cTarget;
3824    ucnv_close(conv);
3825
3826
3827}
3828
3829static void
3830TestISO_2022_JP_1() {
3831    /* test input */
3832    static const uint16_t in[]={
3833        0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3834        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3835        0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3836        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3837        0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3838        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3839        0x201D, 0x000D, 0x000A,
3840        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3841        0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3842        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3843        0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3844        0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3845        0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3846      };
3847    const UChar* uSource;
3848    const UChar* uSourceLimit;
3849    const char* cSource;
3850    const char* cSourceLimit;
3851    UChar *uTargetLimit =NULL;
3852    UChar *uTarget;
3853    char *cTarget;
3854    const char *cTargetLimit;
3855    char *cBuf;
3856    UChar *uBuf,*test;
3857    int32_t uBufSize = 120;
3858    UErrorCode errorCode=U_ZERO_ERROR;
3859    UConverter *cnv;
3860
3861    cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3862    if(U_FAILURE(errorCode)) {
3863        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3864        return;
3865    }
3866
3867    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3868    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3869    uSource = (const UChar*)in;
3870    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3871    cTarget = cBuf;
3872    cTargetLimit = cBuf +uBufSize*5;
3873    uTarget = uBuf;
3874    uTargetLimit = uBuf+ uBufSize*5;
3875    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3876    if(U_FAILURE(errorCode)){
3877        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3878        return;
3879    }
3880    cSource = cBuf;
3881    cSourceLimit =cTarget;
3882    test =uBuf;
3883    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3884    if(U_FAILURE(errorCode)){
3885        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3886        return;
3887    }
3888    uSource = (const UChar*)in;
3889    while(uSource<uSourceLimit){
3890        if(*test!=*uSource){
3891
3892            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3893        }
3894        uSource++;
3895        test++;
3896    }
3897    /*ucnv_close(cnv);
3898    cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3899    /*Test for the condition where there is an invalid character*/
3900    ucnv_reset(cnv);
3901    {
3902        static const uint8_t source2[]={0x0e,0x24,0x053};
3903        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3904    }
3905    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3906    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3907    ucnv_close(cnv);
3908    free(uBuf);
3909    free(cBuf);
3910}
3911
3912static void
3913TestISO_2022_JP_2() {
3914    /* test input */
3915    static const uint16_t in[]={
3916        0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3917        0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3918        0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3919        0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3920        0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3921        0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3922        0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3923        0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3924        0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3925        0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3926        0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3927        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3928        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3929        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3930        0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3931        0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3932        0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3933        0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3934        0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3935      };
3936    const UChar* uSource;
3937    const UChar* uSourceLimit;
3938    const char* cSource;
3939    const char* cSourceLimit;
3940    UChar *uTargetLimit =NULL;
3941    UChar *uTarget;
3942    char *cTarget;
3943    const char *cTargetLimit;
3944    char *cBuf;
3945    UChar *uBuf,*test;
3946    int32_t uBufSize = 120;
3947    UErrorCode errorCode=U_ZERO_ERROR;
3948    UConverter *cnv;
3949    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3950    int32_t* myOff= offsets;
3951    cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3952    if(U_FAILURE(errorCode)) {
3953        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3954        return;
3955    }
3956
3957    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3958    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3959    uSource = (const UChar*)in;
3960    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3961    cTarget = cBuf;
3962    cTargetLimit = cBuf +uBufSize*5;
3963    uTarget = uBuf;
3964    uTargetLimit = uBuf+ uBufSize*5;
3965    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3966    if(U_FAILURE(errorCode)){
3967        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3968        return;
3969    }
3970    cSource = cBuf;
3971    cSourceLimit =cTarget;
3972    test =uBuf;
3973    myOff=offsets;
3974    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3975    if(U_FAILURE(errorCode)){
3976        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3977        return;
3978    }
3979    uSource = (const UChar*)in;
3980    while(uSource<uSourceLimit){
3981        if(*test!=*uSource){
3982
3983            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3984        }
3985        uSource++;
3986        test++;
3987    }
3988    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3989    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3990    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3991    /*Test for the condition where there is an invalid character*/
3992    ucnv_reset(cnv);
3993    {
3994        static const uint8_t source2[]={0x0e,0x24,0x053};
3995        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
3996    }
3997    ucnv_close(cnv);
3998    free(uBuf);
3999    free(cBuf);
4000    free(offsets);
4001}
4002
4003static void
4004TestISO_2022_KR() {
4005    /* test input */
4006    static const uint16_t in[]={
4007                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4008                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4009                   ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4010                   ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4011                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4012                   ,0x53E3,0x53E4,0x000A,0x000D};
4013    const UChar* uSource;
4014    const UChar* uSourceLimit;
4015    const char* cSource;
4016    const char* cSourceLimit;
4017    UChar *uTargetLimit =NULL;
4018    UChar *uTarget;
4019    char *cTarget;
4020    const char *cTargetLimit;
4021    char *cBuf;
4022    UChar *uBuf,*test;
4023    int32_t uBufSize = 120;
4024    UErrorCode errorCode=U_ZERO_ERROR;
4025    UConverter *cnv;
4026    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4027    int32_t* myOff= offsets;
4028    cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4029    if(U_FAILURE(errorCode)) {
4030        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4031        return;
4032    }
4033
4034    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4035    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4036    uSource = (const UChar*)in;
4037    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4038    cTarget = cBuf;
4039    cTargetLimit = cBuf +uBufSize*5;
4040    uTarget = uBuf;
4041    uTargetLimit = uBuf+ uBufSize*5;
4042    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4043    if(U_FAILURE(errorCode)){
4044        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4045        return;
4046    }
4047    cSource = cBuf;
4048    cSourceLimit =cTarget;
4049    test =uBuf;
4050    myOff=offsets;
4051    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4052    if(U_FAILURE(errorCode)){
4053        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4054        return;
4055    }
4056    uSource = (const UChar*)in;
4057    while(uSource<uSourceLimit){
4058        if(*test!=*uSource){
4059            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4060        }
4061        uSource++;
4062        test++;
4063    }
4064    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4065    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4066    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4067    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4068    TestJitterbug930("csISO2022KR");
4069    /*Test for the condition where there is an invalid character*/
4070    ucnv_reset(cnv);
4071    {
4072        static const uint8_t source2[]={0x1b,0x24,0x053};
4073        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4074        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4075    }
4076    ucnv_close(cnv);
4077    free(uBuf);
4078    free(cBuf);
4079    free(offsets);
4080}
4081
4082static void
4083TestISO_2022_KR_1() {
4084    /* test input */
4085    static const uint16_t in[]={
4086                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4087                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4088                   ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4089                   ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4090                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4091                   ,0x53E3,0x53E4,0x000A,0x000D};
4092    const UChar* uSource;
4093    const UChar* uSourceLimit;
4094    const char* cSource;
4095    const char* cSourceLimit;
4096    UChar *uTargetLimit =NULL;
4097    UChar *uTarget;
4098    char *cTarget;
4099    const char *cTargetLimit;
4100    char *cBuf;
4101    UChar *uBuf,*test;
4102    int32_t uBufSize = 120;
4103    UErrorCode errorCode=U_ZERO_ERROR;
4104    UConverter *cnv;
4105    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4106    int32_t* myOff= offsets;
4107    cnv=ucnv_open("ibm-25546", &errorCode);
4108    if(U_FAILURE(errorCode)) {
4109        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4110        return;
4111    }
4112
4113    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4114    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4115    uSource = (const UChar*)in;
4116    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4117    cTarget = cBuf;
4118    cTargetLimit = cBuf +uBufSize*5;
4119    uTarget = uBuf;
4120    uTargetLimit = uBuf+ uBufSize*5;
4121    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4122    if(U_FAILURE(errorCode)){
4123        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4124        return;
4125    }
4126    cSource = cBuf;
4127    cSourceLimit =cTarget;
4128    test =uBuf;
4129    myOff=offsets;
4130    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4131    if(U_FAILURE(errorCode)){
4132        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4133        return;
4134    }
4135    uSource = (const UChar*)in;
4136    while(uSource<uSourceLimit){
4137        if(*test!=*uSource){
4138            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4139        }
4140        uSource++;
4141        test++;
4142    }
4143    ucnv_reset(cnv);
4144    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4145    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4146    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4147    ucnv_reset(cnv);
4148    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4149        /*Test for the condition where there is an invalid character*/
4150    ucnv_reset(cnv);
4151    {
4152        static const uint8_t source2[]={0x1b,0x24,0x053};
4153        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4154        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4155    }
4156    ucnv_close(cnv);
4157    free(uBuf);
4158    free(cBuf);
4159    free(offsets);
4160}
4161
4162static void TestJitterbug2411(){
4163    static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4164                         "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4165    UConverter* kr=NULL, *kr1=NULL;
4166    UErrorCode errorCode = U_ZERO_ERROR;
4167    UChar tgt[100]={'\0'};
4168    UChar* target = tgt;
4169    UChar* targetLimit = target+100;
4170    kr=ucnv_open("iso-2022-kr", &errorCode);
4171    if(U_FAILURE(errorCode)) {
4172        log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4173        return;
4174    }
4175    ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4176    if(U_FAILURE(errorCode)) {
4177        log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4178        return;
4179    }
4180    kr1 = ucnv_open("ibm-25546", &errorCode);
4181    if(U_FAILURE(errorCode)) {
4182        log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4183        return;
4184    }
4185    target = tgt;
4186    targetLimit = target+100;
4187    ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4188
4189    if(U_FAILURE(errorCode)) {
4190        log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4191        return;
4192    }
4193
4194    ucnv_close(kr);
4195    ucnv_close(kr1);
4196
4197}
4198
4199static void
4200TestJIS(){
4201    /* From Unicode moved to testdata/conversion.txt */
4202    /*To Unicode*/
4203    {
4204        static const uint8_t sampleTextJIS[] = {
4205            0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4206            0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4207            0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4208        };
4209        static const uint16_t expectedISO2022JIS[] = {
4210            0x0041, 0x0042,
4211            0xFF81, 0xFF82,
4212            0x3000
4213        };
4214        static const int32_t  toISO2022JISOffs[]={
4215            3,4,
4216            8,9,
4217            16
4218        };
4219
4220        static const uint8_t sampleTextJIS7[] = {
4221            0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4222            0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4223            0x1b,0x24,0x42,0x21,0x21,
4224            0x0e,0x41,0x42,0x0f,      /*Test Katakana set with SI and SO */
4225            0x21,0x22,
4226            0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4227        };
4228        static const uint16_t expectedISO2022JIS7[] = {
4229            0x0041, 0x0042,
4230            0xFF81, 0xFF82,
4231            0x3000,
4232            0xFF81, 0xFF82,
4233            0x3001,
4234            0x3000
4235        };
4236        static const int32_t  toISO2022JIS7Offs[]={
4237            3,4,
4238            8,9,
4239            13,16,
4240            17,
4241            19,27
4242        };
4243        static const uint8_t sampleTextJIS8[] = {
4244            0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4245            0xa1,0xc8,0xd9,/*Katakana Set*/
4246            0x1b,0x28,0x42,
4247            0x41,0x42,
4248            0xb1,0xc3, /*Katakana Set*/
4249            0x1b,0x24,0x42,0x21,0x21
4250        };
4251        static const uint16_t expectedISO2022JIS8[] = {
4252            0x0041, 0x0042,
4253            0xff61, 0xff88, 0xff99,
4254            0x0041, 0x0042,
4255            0xff71, 0xff83,
4256            0x3000
4257        };
4258        static const int32_t  toISO2022JIS8Offs[]={
4259            3, 4,  5,  6,
4260            7, 11, 12, 13,
4261            14, 18,
4262        };
4263
4264        testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4265            sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
4266        testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4267            sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
4268        testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4269            sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
4270    }
4271
4272}
4273
4274
4275#if 0
4276 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4277
4278static void TestJitterbug915(){
4279/* tests for roundtripping of the below sequence
4280\x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+          / *plane 1 * /
4281\x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4282\x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4283\x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4284\x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4285\x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4286\x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4287*/
4288    static const char cSource[]={
4289        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4290        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4291        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4292        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4293        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4294        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4295        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4296        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4297        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4298        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4299        0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4300        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4301        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4302        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4303        0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4304        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4305        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4306        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4307        0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4308        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4309        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4310        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4311        0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4312        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4313        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4314        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4315        0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4316        0x37, 0x20, 0x2A, 0x2F
4317    };
4318    UChar uTarget[500]={'\0'};
4319    UChar* utarget=uTarget;
4320    UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4321
4322    char cTarget[500]={'\0'};
4323    char* ctarget=cTarget;
4324    char* ctargetLimit=cTarget+sizeof(cTarget);
4325    const char* csource=cSource;
4326    const char* tempSrc = cSource;
4327    UErrorCode err=U_ZERO_ERROR;
4328
4329    UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4330    if(U_FAILURE(err)) {
4331        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4332        return;
4333    }
4334    ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4335    if(U_FAILURE(err)) {
4336        log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4337        return;
4338    }
4339    utargetLimit=utarget;
4340    utarget = uTarget;
4341    ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4342    if(U_FAILURE(err)) {
4343        log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4344        return;
4345    }
4346    ctargetLimit=ctarget;
4347    ctarget =cTarget;
4348    while(ctarget<ctargetLimit){
4349        if(*ctarget != *tempSrc){
4350            log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4351        }
4352        ++ctarget;
4353        ++tempSrc;
4354    }
4355
4356    ucnv_close(conv);
4357}
4358
4359static void
4360TestISO_2022_CN_EXT() {
4361    /* test input */
4362    static const uint16_t in[]={
4363                /* test Non-BMP code points */
4364         0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4365         0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4366         0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4367         0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4368         0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4369         0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4370         0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4371         0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4372         0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4373         0xD869, 0xDED5,
4374
4375         0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4376         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4377         0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4378         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4379         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4380         0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4381         0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4382         0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4383         0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4384         0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4385         0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4386         0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4387         0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4388         0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4389         0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4390         0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4391         0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4392         0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4393
4394         0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4395
4396      };
4397
4398    const UChar* uSource;
4399    const UChar* uSourceLimit;
4400    const char* cSource;
4401    const char* cSourceLimit;
4402    UChar *uTargetLimit =NULL;
4403    UChar *uTarget;
4404    char *cTarget;
4405    const char *cTargetLimit;
4406    char *cBuf;
4407    UChar *uBuf,*test;
4408    int32_t uBufSize = 180;
4409    UErrorCode errorCode=U_ZERO_ERROR;
4410    UConverter *cnv;
4411    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4412    int32_t* myOff= offsets;
4413    cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4414    if(U_FAILURE(errorCode)) {
4415        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4416        return;
4417    }
4418
4419    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4420    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4421    uSource = (const UChar*)in;
4422    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4423    cTarget = cBuf;
4424    cTargetLimit = cBuf +uBufSize*5;
4425    uTarget = uBuf;
4426    uTargetLimit = uBuf+ uBufSize*5;
4427    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4428    if(U_FAILURE(errorCode)){
4429        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4430        return;
4431    }
4432    cSource = cBuf;
4433    cSourceLimit =cTarget;
4434    test =uBuf;
4435    myOff=offsets;
4436    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4437    if(U_FAILURE(errorCode)){
4438        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4439        return;
4440    }
4441    uSource = (const UChar*)in;
4442    while(uSource<uSourceLimit){
4443        if(*test!=*uSource){
4444            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4445        }
4446        else{
4447            log_verbose("      Got: \\u%04X\n",(int)*test) ;
4448        }
4449        uSource++;
4450        test++;
4451    }
4452    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4453    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4454    /*Test for the condition where there is an invalid character*/
4455    ucnv_reset(cnv);
4456    {
4457        static const uint8_t source2[]={0x0e,0x24,0x053};
4458        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4459    }
4460    ucnv_close(cnv);
4461    free(uBuf);
4462    free(cBuf);
4463    free(offsets);
4464}
4465#endif
4466
4467static void
4468TestISO_2022_CN() {
4469    /* test input */
4470    static const uint16_t in[]={
4471         /* jitterbug 951 */
4472         0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4473         0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4474         0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4475         0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4476         0x0020, 0x0045, 0x004e, 0x0044,
4477         /**/
4478         0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4479         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4480         0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4481         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4482         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4483         0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4484         0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4485         0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4486         0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4487         0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4488         0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4489         0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4490         0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4491         0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4492         0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4493         0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4494         0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4495
4496      };
4497    const UChar* uSource;
4498    const UChar* uSourceLimit;
4499    const char* cSource;
4500    const char* cSourceLimit;
4501    UChar *uTargetLimit =NULL;
4502    UChar *uTarget;
4503    char *cTarget;
4504    const char *cTargetLimit;
4505    char *cBuf;
4506    UChar *uBuf,*test;
4507    int32_t uBufSize = 180;
4508    UErrorCode errorCode=U_ZERO_ERROR;
4509    UConverter *cnv;
4510    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4511    int32_t* myOff= offsets;
4512    cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4513    if(U_FAILURE(errorCode)) {
4514        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4515        return;
4516    }
4517
4518    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4519    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4520    uSource = (const UChar*)in;
4521    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4522    cTarget = cBuf;
4523    cTargetLimit = cBuf +uBufSize*5;
4524    uTarget = uBuf;
4525    uTargetLimit = uBuf+ uBufSize*5;
4526    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4527    if(U_FAILURE(errorCode)){
4528        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4529        return;
4530    }
4531    cSource = cBuf;
4532    cSourceLimit =cTarget;
4533    test =uBuf;
4534    myOff=offsets;
4535    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4536    if(U_FAILURE(errorCode)){
4537        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4538        return;
4539    }
4540    uSource = (const UChar*)in;
4541    while(uSource<uSourceLimit){
4542        if(*test!=*uSource){
4543            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4544        }
4545        else{
4546            log_verbose("      Got: \\u%04X\n",(int)*test) ;
4547        }
4548        uSource++;
4549        test++;
4550    }
4551    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4552    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4553    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4554    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4555    TestJitterbug930("csISO2022CN");
4556    /*Test for the condition where there is an invalid character*/
4557    ucnv_reset(cnv);
4558    {
4559        static const uint8_t source2[]={0x0e,0x24,0x053};
4560        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4561    }
4562
4563    ucnv_close(cnv);
4564    free(uBuf);
4565    free(cBuf);
4566    free(offsets);
4567}
4568
4569/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4570typedef struct {
4571    const char *    converterName;
4572    const char *    inputText;
4573    int             inputTextLength;
4574} EmptySegmentTest;
4575
4576/* Callback for TestJitterbug6175, should only get called for empty segment errors */
4577static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4578                                             int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4579    if (reason > UCNV_IRREGULAR) {
4580        return;
4581    }
4582    if (reason != UCNV_IRREGULAR) {
4583        log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4584    }
4585    /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4586    *err = U_ZERO_ERROR;
4587    ucnv_cbToUWriteSub(toArgs,0,err);
4588}
4589
4590enum { kEmptySegmentToUCharsMax = 64 };
4591static void TestJitterbug6175(void) {
4592    static const char  iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4593    static const char  iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4594    static const char  iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4595    static const char  iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4596    static const char  hzGB2312_a[]  = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4597    static const EmptySegmentTest emptySegmentTests[] = {
4598        /* converterName inputText    inputTextLength */
4599        { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4600        { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4601        { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4602        { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4603        { "HZ-GB-2312",  hzGB2312_a,  sizeof(hzGB2312_a)  },
4604        /* terminator: */
4605        { NULL,          NULL,        0,                  }
4606    };
4607    const EmptySegmentTest * testPtr;
4608    for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4609        UErrorCode   err = U_ZERO_ERROR;
4610        UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4611        if (U_FAILURE(err)) {
4612            log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4613            return;
4614        }
4615        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4616        if (U_FAILURE(err)) {
4617            log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4618            ucnv_close(cnv);
4619            return;
4620        }
4621        {
4622            UChar         toUChars[kEmptySegmentToUCharsMax];
4623            UChar *       toUCharsPtr = toUChars;
4624            const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4625            const char *  inCharsPtr = testPtr->inputText;
4626            const char *  inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4627            ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4628        }
4629        ucnv_close(cnv);
4630    }
4631}
4632
4633static void
4634TestEBCDIC_STATEFUL() {
4635    /* test input */
4636    static const uint8_t in[]={
4637        0x61,
4638        0x1a,
4639        0x0f, 0x4b,
4640        0x42,
4641        0x40,
4642        0x36,
4643    };
4644
4645    /* expected test results */
4646    static const int32_t results[]={
4647        /* number of bytes read, code point */
4648        1, 0x002f,
4649        1, 0x0092,
4650        2, 0x002e,
4651        1, 0xff62,
4652        1, 0x0020,
4653        1, 0x0096,
4654
4655    };
4656    static const uint8_t in2[]={
4657        0x0f,
4658        0xa1,
4659        0x01
4660    };
4661
4662    /* expected test results */
4663    static const int32_t results2[]={
4664        /* number of bytes read, code point */
4665        2, 0x203E,
4666        1, 0x0001,
4667    };
4668
4669    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4670    UErrorCode errorCode=U_ZERO_ERROR;
4671    UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4672    if(U_FAILURE(errorCode)) {
4673        log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4674        return;
4675    }
4676    TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4677    ucnv_reset(cnv);
4678     /* Test the condition when source >= sourceLimit */
4679    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4680    ucnv_reset(cnv);
4681    /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4682    {
4683        static const uint8_t source1[]={0x0f};
4684        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4685    }
4686    /*Test for the condition where there is an invalid character*/
4687    ucnv_reset(cnv);
4688    {
4689        static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4690        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4691    }
4692    ucnv_reset(cnv);
4693    source=(const char*)in2;
4694    limit=(const char*)in2+sizeof(in2);
4695    TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4696    ucnv_close(cnv);
4697
4698}
4699
4700static void
4701TestGB18030() {
4702    /* test input */
4703    static const uint8_t in[]={
4704        0x24,
4705        0x7f,
4706        0x81, 0x30, 0x81, 0x30,
4707        0xa8, 0xbf,
4708        0xa2, 0xe3,
4709        0xd2, 0xbb,
4710        0x82, 0x35, 0x8f, 0x33,
4711        0x84, 0x31, 0xa4, 0x39,
4712        0x90, 0x30, 0x81, 0x30,
4713        0xe3, 0x32, 0x9a, 0x35
4714#if 0
4715        /*
4716         * Feature removed   markus 2000-oct-26
4717         * Only some codepages must match surrogate pairs into supplementary code points -
4718         * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4719         * GB 18030 provides direct encodings for supplementary code points, therefore
4720         * it must not combine two single-encoded surrogates into one code point.
4721         */
4722        0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4723#endif
4724    };
4725
4726    /* expected test results */
4727    static const int32_t results[]={
4728        /* number of bytes read, code point */
4729        1, 0x24,
4730        1, 0x7f,
4731        4, 0x80,
4732        2, 0x1f9,
4733        2, 0x20ac,
4734        2, 0x4e00,
4735        4, 0x9fa6,
4736        4, 0xffff,
4737        4, 0x10000,
4738        4, 0x10ffff
4739#if 0
4740        /* Feature removed. See comment above. */
4741        8, 0x10000
4742#endif
4743    };
4744
4745/*    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4746    UErrorCode errorCode=U_ZERO_ERROR;
4747    UConverter *cnv=ucnv_open("gb18030", &errorCode);
4748    if(U_FAILURE(errorCode)) {
4749        log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4750        return;
4751    }
4752    TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4753    ucnv_close(cnv);
4754}
4755
4756static void
4757TestLMBCS() {
4758    /* LMBCS-1 string */
4759    static const uint8_t pszLMBCS[]={
4760        0x61,
4761        0x01, 0x29,
4762        0x81,
4763        0xA0,
4764        0x0F, 0x27,
4765        0x0F, 0x91,
4766        0x14, 0x0a, 0x74,
4767        0x14, 0xF6, 0x02,
4768        0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4769        0x10, 0x88, 0xA0,
4770    };
4771
4772    /* Unicode UChar32 equivalents */
4773    static const UChar32 pszUnicode32[]={
4774        /* code point */
4775        0x00000061,
4776        0x00002013,
4777        0x000000FC,
4778        0x000000E1,
4779        0x00000007,
4780        0x00000091,
4781        0x00000a74,
4782        0x00000200,
4783        0x00023456, /* code point for surrogate pair */
4784        0x00005516
4785    };
4786
4787/* Unicode UChar equivalents */
4788    static const UChar pszUnicode[]={
4789        /* code point */
4790        0x0061,
4791        0x2013,
4792        0x00FC,
4793        0x00E1,
4794        0x0007,
4795        0x0091,
4796        0x0a74,
4797        0x0200,
4798        0xD84D, /* low surrogate */
4799        0xDC56, /* high surrogate */
4800        0x5516
4801    };
4802
4803/* expected test results */
4804    static const int offsets32[]={
4805        /* number of bytes read, code point */
4806        0,
4807        1,
4808        3,
4809        4,
4810        5,
4811        7,
4812        9,
4813        12,
4814        15,
4815        21,
4816        24
4817    };
4818
4819/* expected test results */
4820    static const int offsets[]={
4821        /* number of bytes read, code point */
4822        0,
4823        1,
4824        3,
4825        4,
4826        5,
4827        7,
4828        9,
4829        12,
4830        15,
4831        18,
4832        21,
4833        24
4834    };
4835
4836
4837    UConverter *cnv;
4838
4839#define NAME_LMBCS_1 "LMBCS-1"
4840#define NAME_LMBCS_2 "LMBCS-2"
4841
4842
4843   /* Some basic open/close/property tests on some LMBCS converters */
4844    {
4845
4846      char expected_subchars[] = {0x3F};   /* ANSI Question Mark */
4847      char new_subchars [] = {0x7F};       /* subst char used by SmartSuite..*/
4848      char get_subchars [1];
4849      const char * get_name;
4850      UConverter *cnv1;
4851      UConverter *cnv2;
4852
4853      int8_t len = sizeof(get_subchars);
4854
4855      UErrorCode errorCode=U_ZERO_ERROR;
4856
4857      /* Open */
4858      cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4859      if(U_FAILURE(errorCode)) {
4860         log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4861         return;
4862      }
4863      cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4864      if(U_FAILURE(errorCode)) {
4865         log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4866         return;
4867      }
4868
4869      /* Name */
4870      get_name = ucnv_getName (cnv1, &errorCode);
4871      if (strcmp(NAME_LMBCS_1,get_name)){
4872         log_err("Unexpected converter name: %s\n", get_name);
4873      }
4874      get_name = ucnv_getName (cnv2, &errorCode);
4875      if (strcmp(NAME_LMBCS_2,get_name)){
4876         log_err("Unexpected converter name: %s\n", get_name);
4877      }
4878
4879      /* substitution chars */
4880      ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4881      if(U_FAILURE(errorCode)) {
4882         log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4883      }
4884      if (len!=1){
4885         log_err("Unexpected length of sub chars\n");
4886      }
4887      if (get_subchars[0] != expected_subchars[0]){
4888           log_err("Unexpected value of sub chars\n");
4889      }
4890      ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4891      if(U_FAILURE(errorCode)) {
4892         log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4893      }
4894      ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4895      if(U_FAILURE(errorCode)) {
4896         log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4897      }
4898      if (len!=1){
4899         log_err("Unexpected length of sub chars\n");
4900      }
4901      if (get_subchars[0] != new_subchars[0]){
4902           log_err("Unexpected value of sub chars\n");
4903      }
4904      ucnv_close(cnv1);
4905      ucnv_close(cnv2);
4906
4907    }
4908
4909    /* LMBCS to Unicode - offsets */
4910    {
4911       UErrorCode errorCode=U_ZERO_ERROR;
4912
4913       const char * pSource = (const char *)pszLMBCS;
4914       const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4915
4916       UChar Out [sizeof(pszUnicode) + 1];
4917       UChar * pOut = Out;
4918       UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4919
4920       int32_t off [sizeof(offsets)];
4921
4922      /* last 'offset' in expected results is just the final size.
4923         (Makes other tests easier). Compensate here: */
4924
4925       off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
4926
4927
4928
4929      cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4930      if(U_FAILURE(errorCode)) {
4931           log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4932           return;
4933      }
4934
4935
4936
4937      ucnv_toUnicode (cnv,
4938                      &pOut,
4939                      OutLimit,
4940                      &pSource,
4941                      sourceLimit,
4942                      off,
4943                      TRUE,
4944                      &errorCode);
4945
4946
4947       if (memcmp(off,offsets,sizeof(offsets)))
4948       {
4949         log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4950       }
4951       if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4952       {
4953         log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4954       }
4955       ucnv_close(cnv);
4956    }
4957    {
4958   /* LMBCS to Unicode - getNextUChar */
4959      const char * sourceStart;
4960      const char *source=(const char *)pszLMBCS;
4961      const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4962      const UChar32 *results= pszUnicode32;
4963      const int *off = offsets32;
4964
4965      UErrorCode errorCode=U_ZERO_ERROR;
4966      UChar32 uniChar;
4967
4968      cnv=ucnv_open("LMBCS-1", &errorCode);
4969      if(U_FAILURE(errorCode)) {
4970           log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4971           return;
4972      }
4973      else
4974      {
4975
4976         while(source<limit) {
4977            sourceStart=source;
4978            uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4979            if(U_FAILURE(errorCode)) {
4980                  log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4981                  break;
4982            } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4983               log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4984                   uniChar, (source-sourceStart), *results, *off);
4985               break;
4986            }
4987            results++;
4988            off++;
4989         }
4990       }
4991       ucnv_close(cnv);
4992    }
4993    { /* test locale & optimization group operations: Unicode to LMBCS */
4994
4995      UErrorCode errorCode=U_ZERO_ERROR;
4996      UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
4997      UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
4998      UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
4999      UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
5000      const UChar * pUniOut = uniString;
5001      UChar * pUniIn = uniString;
5002      uint8_t lmbcsString [4];
5003      const char * pLMBCSOut = (const char *)lmbcsString;
5004      char * pLMBCSIn = (char *)lmbcsString;
5005
5006      /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5007      ucnv_fromUnicode (cnv16he,
5008                        &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
5009                        &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
5010                        NULL, 1, &errorCode);
5011
5012      if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5013      {
5014         log_err("LMBCS-16,locale=he gives unexpected translation\n");
5015      }
5016
5017      pLMBCSIn= (char *)lmbcsString;
5018      pUniOut = uniString;
5019      ucnv_fromUnicode (cnv01us,
5020                        &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
5021                        &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
5022                        NULL, 1, &errorCode);
5023
5024      if (lmbcsString[0] != 0x9F)
5025      {
5026         log_err("LMBCS-1,locale=US gives unexpected translation\n");
5027      }
5028
5029      /* single byte char from mbcs char set */
5030      lmbcsString[0] = 0xAE;  /* 1/2 width katakana letter small Yo */
5031      pLMBCSOut = (const char *)lmbcsString;
5032      pUniIn = uniString;
5033      ucnv_toUnicode (cnv16jp,
5034                        &pUniIn, pUniIn + 1,
5035                        &pLMBCSOut, (pLMBCSOut + 1),
5036                        NULL, 1, &errorCode);
5037      if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5038      {
5039           log_err("Unexpected results from LMBCS-16 single byte char\n");
5040      }
5041      /* convert to group 1: should be 3 bytes */
5042      pLMBCSIn = (char *)lmbcsString;
5043      pUniOut = uniString;
5044      ucnv_fromUnicode (cnv01us,
5045                        &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5046                        &pUniOut, pUniOut + 1,
5047                        NULL, 1, &errorCode);
5048      if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5049         || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5050      {
5051           log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5052      }
5053      pLMBCSOut = (const char *)lmbcsString;
5054      pUniIn = uniString;
5055      ucnv_toUnicode (cnv01us,
5056                        &pUniIn, pUniIn + 1,
5057                        &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5058                        NULL, 1, &errorCode);
5059      if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5060      {
5061           log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5062      }
5063      pLMBCSIn = (char *)lmbcsString;
5064      pUniOut = uniString;
5065      ucnv_fromUnicode (cnv16jp,
5066                        &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5067                        &pUniOut, pUniOut + 1,
5068                        NULL, 1, &errorCode);
5069      if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5070      {
5071           log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5072      }
5073      ucnv_close(cnv16he);
5074      ucnv_close(cnv16jp);
5075      ucnv_close(cnv01us);
5076    }
5077    {
5078       /* Small source buffer testing, LMBCS -> Unicode */
5079
5080       UErrorCode errorCode=U_ZERO_ERROR;
5081
5082       const char * pSource = (const char *)pszLMBCS;
5083       const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5084       int codepointCount = 0;
5085
5086       UChar Out [sizeof(pszUnicode) + 1];
5087       UChar * pOut = Out;
5088       UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
5089
5090
5091       cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5092       if(U_FAILURE(errorCode)) {
5093           log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5094           return;
5095       }
5096
5097
5098       while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5099       {
5100           ucnv_toUnicode (cnv,
5101               &pOut,
5102               OutLimit,
5103               &pSource,
5104               (pSource+1), /* claim that this is a 1- byte buffer */
5105               NULL,
5106               FALSE,    /* FALSE means there might be more chars in the next buffer */
5107               &errorCode);
5108
5109           if (U_SUCCESS (errorCode))
5110           {
5111               if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5112               {
5113                   /* we are on to the next code point: check value */
5114
5115                   if (Out[0] != pszUnicode[codepointCount]){
5116                       log_err("LMBCS->Uni result %lx should have been %lx \n",
5117                           Out[0], pszUnicode[codepointCount]);
5118                   }
5119
5120                   pOut = Out; /* reset for accumulating next code point */
5121                   codepointCount++;
5122               }
5123           }
5124           else
5125           {
5126               log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5127           }
5128       }
5129       {
5130         /* limits & surrogate error testing */
5131         char LIn [sizeof(pszLMBCS)];
5132         const char * pLIn = LIn;
5133
5134         char LOut [sizeof(pszLMBCS)];
5135         char * pLOut = LOut;
5136
5137         UChar UOut [sizeof(pszUnicode)];
5138         UChar * pUOut = UOut;
5139
5140         UChar UIn [sizeof(pszUnicode)];
5141         const UChar * pUIn = UIn;
5142
5143         int32_t off [sizeof(offsets)];
5144         UChar32 uniChar;
5145
5146         errorCode=U_ZERO_ERROR;
5147
5148         /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5149         pUIn++;
5150         ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
5151         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5152         {
5153            log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5154         }
5155         pUIn--;
5156
5157         errorCode=U_ZERO_ERROR;
5158         ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5159         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5160         {
5161            log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5162         }
5163         errorCode=U_ZERO_ERROR;
5164
5165         uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5166         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5167         {
5168            log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5169         }
5170         errorCode=U_ZERO_ERROR;
5171
5172         /* 0 byte source request - no error, no pointer movement */
5173         ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5174         ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5175         if(U_FAILURE(errorCode)) {
5176            log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5177         }
5178         if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5179         {
5180              log_err("Unexpected pointer move in 0 byte source request \n");
5181         }
5182         /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5183         uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5184         if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5185         {
5186            log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5187         }
5188         if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5189         {
5190            log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5191         }
5192         errorCode = U_ZERO_ERROR;
5193
5194         /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5195
5196         pUIn = pszUnicode;
5197         ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
5198         if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5199         {
5200            log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5201         }
5202
5203         errorCode = U_ZERO_ERROR;
5204
5205         pLIn = (const char *)pszLMBCS;
5206         ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5207         if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5208         {
5209            log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5210         }
5211
5212         /* unpaired or chopped LMBCS surrogates */
5213
5214         /* OK high surrogate, Low surrogate is chopped */
5215         LIn [0] = (char)0x14;
5216         LIn [1] = (char)0xD8;
5217         LIn [2] = (char)0x01;
5218         LIn [3] = (char)0x14;
5219         LIn [4] = (char)0xDC;
5220         pLIn = LIn;
5221         errorCode = U_ZERO_ERROR;
5222         pUOut = UOut;
5223
5224         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5225         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5226         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5227         {
5228            log_err("Unexpected results on chopped low surrogate\n");
5229         }
5230
5231         /* chopped at surrogate boundary */
5232         LIn [0] = (char)0x14;
5233         LIn [1] = (char)0xD8;
5234         LIn [2] = (char)0x01;
5235         pLIn = LIn;
5236         errorCode = U_ZERO_ERROR;
5237         pUOut = UOut;
5238
5239         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5240         if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5241         {
5242            log_err("Unexpected results on chopped at surrogate boundary \n");
5243         }
5244
5245         /* unpaired surrogate plus valid Unichar */
5246         LIn [0] = (char)0x14;
5247         LIn [1] = (char)0xD8;
5248         LIn [2] = (char)0x01;
5249         LIn [3] = (char)0x14;
5250         LIn [4] = (char)0xC9;
5251         LIn [5] = (char)0xD0;
5252         pLIn = LIn;
5253         errorCode = U_ZERO_ERROR;
5254         pUOut = UOut;
5255
5256         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5257         if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5258         {
5259            log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5260         }
5261
5262      /* unpaired surrogate plus chopped Unichar */
5263         LIn [0] = (char)0x14;
5264         LIn [1] = (char)0xD8;
5265         LIn [2] = (char)0x01;
5266         LIn [3] = (char)0x14;
5267         LIn [4] = (char)0xC9;
5268
5269         pLIn = LIn;
5270         errorCode = U_ZERO_ERROR;
5271         pUOut = UOut;
5272
5273         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5274         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5275         {
5276            log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5277         }
5278
5279         /* unpaired surrogate plus valid non-Unichar */
5280         LIn [0] = (char)0x14;
5281         LIn [1] = (char)0xD8;
5282         LIn [2] = (char)0x01;
5283         LIn [3] = (char)0x0F;
5284         LIn [4] = (char)0x3B;
5285
5286         pLIn = LIn;
5287         errorCode = U_ZERO_ERROR;
5288         pUOut = UOut;
5289
5290         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5291         if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5292         {
5293            log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5294         }
5295
5296         /* unpaired surrogate plus chopped non-Unichar */
5297         LIn [0] = (char)0x14;
5298         LIn [1] = (char)0xD8;
5299         LIn [2] = (char)0x01;
5300         LIn [3] = (char)0x0F;
5301
5302         pLIn = LIn;
5303         errorCode = U_ZERO_ERROR;
5304         pUOut = UOut;
5305
5306         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5307
5308         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5309         {
5310            log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5311         }
5312       }
5313    }
5314   ucnv_close(cnv);  /* final cleanup */
5315}
5316
5317
5318static void TestJitterbug255()
5319{
5320    static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5321    const char *testBuffer = (const char *)testBytes;
5322    const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5323    UErrorCode status = U_ZERO_ERROR;
5324    /*UChar32 result;*/
5325    UConverter *cnv = 0;
5326
5327    cnv = ucnv_open("shift-jis", &status);
5328    if (U_FAILURE(status) || cnv == 0) {
5329        log_data_err("Failed to open the converter for SJIS.\n");
5330                return;
5331    }
5332    while (testBuffer != testEnd)
5333    {
5334        /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5335        if (U_FAILURE(status))
5336        {
5337            log_err("Failed to convert the next UChar for SJIS.\n");
5338            break;
5339        }
5340    }
5341    ucnv_close(cnv);
5342}
5343
5344static void TestEBCDICUS4XML()
5345{
5346    UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5347    static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5348    static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5349    static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5350    char target_x[] = {0x00, 0x00, 0x00, 0x00};
5351    UChar *unicodes = unicodes_x;
5352    const UChar *toUnicodeMaps = toUnicodeMaps_x;
5353    char *target = target_x;
5354    const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5355    UErrorCode status = U_ZERO_ERROR;
5356    UConverter *cnv = 0;
5357
5358    cnv = ucnv_open("ebcdic-xml-us", &status);
5359    if (U_FAILURE(status) || cnv == 0) {
5360        log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5361        return;
5362    }
5363    ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5364    if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5365        log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5366            u_errorName(status));
5367        printUSeqErr(unicodes_x, 3);
5368        printUSeqErr(toUnicodeMaps, 3);
5369    }
5370    status = U_ZERO_ERROR;
5371    ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5372    if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5373        log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5374            u_errorName(status));
5375        printSeqErr((const unsigned char*)target_x, 3);
5376        printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5377    }
5378    ucnv_close(cnv);
5379}
5380#endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5381
5382#if !UCONFIG_NO_COLLATION
5383
5384static void TestJitterbug981(){
5385    const UChar* rules;
5386    int32_t rules_length, target_cap, bytes_needed, buff_size;
5387    UErrorCode status = U_ZERO_ERROR;
5388    UConverter *utf8cnv;
5389    UCollator* myCollator;
5390    char *buff;
5391    int numNeeded=0;
5392    utf8cnv = ucnv_open ("utf8", &status);
5393    if(U_FAILURE(status)){
5394        log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5395        return;
5396    }
5397    myCollator = ucol_open("zh", &status);
5398    if(U_FAILURE(status)){
5399        log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5400        ucnv_close(utf8cnv);
5401        return;
5402    }
5403
5404    rules = ucol_getRules(myCollator, &rules_length);
5405    if(rules_length == 0) {
5406        log_data_err("missing zh tailoring rule string\n");
5407        ucol_close(myCollator);
5408        ucnv_close(utf8cnv);
5409        return;
5410    }
5411    buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5412    buff = malloc(buff_size);
5413
5414    target_cap = 0;
5415    do {
5416        ucnv_reset(utf8cnv);
5417        status = U_ZERO_ERROR;
5418        if(target_cap >= buff_size) {
5419            log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5420            break;
5421        }
5422        bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5423            rules, rules_length, &status);
5424        target_cap = (bytes_needed > target_cap) ? bytes_needed :