164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
6f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius*   Copyright (C) 2003-2014, International Business Machines
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   file name:  convtest.cpp
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   encoding:   US-ASCII
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   tab size:   8 (not used)
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   indentation:4
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created on: 2003jul15
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created by: Markus W. Scherer
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Test file for data-driven conversion tests.
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_LEGACY_CONVERSION
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is slightly unnecessary - it removes tests for Unicode charsets
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * like UTF-8 that should work.
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * However, there is no easy way for the test to detect whether a test case
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is for a Unicode charset, so it would be difficult to only exclude those.
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Also, regular testing of ICU is done with all modules on, therefore
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not testing conversion for a custom configuration like this should be ok.
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h"
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h"
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/parsepos.h"
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uniset.h"
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ustring.h"
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ures.h"
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "convtest.h"
41f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "cmemory.h"
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/tstdtmod.h"
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <string.h>
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdlib.h>
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum {
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // characters used in test data for callbacks
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SUB_CB='?',
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    SKIP_CB='0',
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    STOP_CB='.',
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ESC_CB='&'
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruConversionTest::ConversionTest() {
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode errorCode=U_ZERO_ERROR;
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utf8Cnv=ucnv_open("UTF-8", &errorCode);
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        errln("unable to open UTF-8 converter");
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruConversionTest::~ConversionTest() {
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_close(utf8Cnv);
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (exec) logln("TestSuite ConversionTest: ");
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    switch (index) {
7150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_FILE_IO
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;
75f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        case 3: name="TestDefaultIgnorableCallback"; if (exec) TestDefaultIgnorableCallback(); break;
7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#else
7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case 0:
7850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case 1:
79f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        case 2:
80f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        case 3: name="skip"; break;
8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
82f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        case 4: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        default: name=""; break; //needed to end loop
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// test data interface ----------------------------------------------------- ***
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruConversionTest::TestToUnicode() {
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ConversionCase cc;
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char charset[100], cbopt[4];
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char *option;
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString s, unicode;
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t offsetsLength;
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverterToUCallback callback;
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TestDataModule *dataModule;
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TestData *testData;
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const DataMap *testCase;
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode errorCode;
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i;
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_SUCCESS(errorCode)) {
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        testData=dataModule->createTestData("toUnicode", errorCode);
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(U_SUCCESS(errorCode)) {
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            for(i=0; testData->nextCase(testCase, errorCode); ++i) {
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(U_FAILURE(errorCode)) {
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errln("error retrieving conversion/toUnicode test case %d - %s",
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            i, u_errorName(errorCode));
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errorCode=U_ZERO_ERROR;
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    continue;
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.caseNr=i;
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                s=testCase->getString("charset", errorCode);
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.charset=charset;
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
12372614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                // BEGIN android-added
12472614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                // To save space, Android does not build full ISO-2022-CN tables.
12572614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                // We skip the TestGetKeywordValuesForLocale for counting available collations.
12672614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                if (strlen(charset) >= 8 &&
12772614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                    strncmp(charset+4, "2022-CN", 4) == 0) {
12872614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                    continue;
12972614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                }
13072614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                // END android-added
13172614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                unicode=testCase->getString("unicode", errorCode);
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.unicode=unicode.getBuffer();
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.unicodeLength=unicode.length();
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsetsLength=0;
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(offsetsLength==0) {
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.offsets=NULL;
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(offsetsLength!=unicode.length()) {
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errln("toUnicode[%d] unicode[%d] and offsets[%d] must have the same length",
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            i, unicode.length(), offsetsLength);
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errorCode=U_ILLEGAL_ARGUMENT_ERROR;
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                s=testCase->getString("errorCode", errorCode);
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(s==UNICODE_STRING("invalid", 7)) {
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.outErrorCode=U_INVALID_CHAR_FOUND;
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(s==UNICODE_STRING("illegal", 7)) {
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(s==UNICODE_STRING("truncated", 9)) {
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(s==UNICODE_STRING("illesc", 6)) {
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.outErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE;
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(s==UNICODE_STRING("unsuppesc", 9)) {
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.outErrorCode=U_UNSUPPORTED_ESCAPE_SEQUENCE;
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.outErrorCode=U_ZERO_ERROR;
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                s=testCase->getString("callback", errorCode);
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.cbopt=cbopt;
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                switch(cbopt[0]) {
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                case SUB_CB:
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    callback=UCNV_TO_U_CALLBACK_SUBSTITUTE;
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                case SKIP_CB:
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    callback=UCNV_TO_U_CALLBACK_SKIP;
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                case STOP_CB:
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    callback=UCNV_TO_U_CALLBACK_STOP;
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                case ESC_CB:
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    callback=UCNV_TO_U_CALLBACK_ESCAPE;
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                default:
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    callback=NULL;
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                option=callback==NULL ? cbopt : cbopt+1;
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(*option==0) {
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    option=NULL;
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.invalidChars=testCase->getBinary(cc.invalidLength, "invalidChars", errorCode);
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(U_FAILURE(errorCode)) {
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errln("error parsing conversion/toUnicode test case %d - %s",
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            i, u_errorName(errorCode));
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errorCode=U_ZERO_ERROR;
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    logln("TestToUnicode[%d] %s", i, charset);
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ToUnicodeCase(cc, callback, option);
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            delete testData;
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        delete dataModule;
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    else {
20685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        dataerrln("Could not load test conversion data");
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruConversionTest::TestFromUnicode() {
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ConversionCase cc;
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char charset[100], cbopt[4];
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char *option;
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString s, unicode, invalidUChars;
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t offsetsLength, index;
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverterFromUCallback callback;
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TestDataModule *dataModule;
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TestData *testData;
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const DataMap *testCase;
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *p;
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode errorCode;
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i, length;
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_SUCCESS(errorCode)) {
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        testData=dataModule->createTestData("fromUnicode", errorCode);
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(U_SUCCESS(errorCode)) {
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            for(i=0; testData->nextCase(testCase, errorCode); ++i) {
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(U_FAILURE(errorCode)) {
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errln("error retrieving conversion/fromUnicode test case %d - %s",
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            i, u_errorName(errorCode));
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errorCode=U_ZERO_ERROR;
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    continue;
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.caseNr=i;
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                s=testCase->getString("charset", errorCode);
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.charset=charset;
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
24572614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                // BEGIN android-added
24672614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                // To save space, Android does not build full ISO-2022-CN tables.
24772614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                // We skip the TestGetKeywordValuesForLocale for counting available collations.
24872614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                if (strlen(charset) >= 8 &&
24972614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                    strncmp(charset+4, "2022-CN", 4) == 0) {
25072614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                    continue;
25172614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                }
25272614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                // END android-added
25372614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                unicode=testCase->getString("unicode", errorCode);
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.unicode=unicode.getBuffer();
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.unicodeLength=unicode.length();
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsetsLength=0;
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(offsetsLength==0) {
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.offsets=NULL;
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(offsetsLength!=cc.bytesLength) {
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have the same length",
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            i, cc.bytesLength, offsetsLength);
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errorCode=U_ILLEGAL_ARGUMENT_ERROR;
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                s=testCase->getString("errorCode", errorCode);
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(s==UNICODE_STRING("invalid", 7)) {
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.outErrorCode=U_INVALID_CHAR_FOUND;
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(s==UNICODE_STRING("illegal", 7)) {
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(s==UNICODE_STRING("truncated", 9)) {
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.outErrorCode=U_ZERO_ERROR;
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                s=testCase->getString("callback", errorCode);
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.setSub=0; // default: no subchar
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if((index=s.indexOf((UChar)0))>0) {
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    // read NUL-separated subchar first, if any
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    // copy the subchar from Latin-1 characters
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    // start after the NUL
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    p=s.getTerminatedBuffer();
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=index+1;
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    p+=length;
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=s.length()-length;
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(length<=0 || length>=(int32_t)sizeof(cc.subchar)) {
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else {
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        int32_t j;
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        for(j=0; j<length; ++j) {
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            cc.subchar[j]=(char)p[j];
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        }
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        // NUL-terminate the subchar
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        cc.subchar[j]=0;
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        cc.setSub=1;
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    // remove the NUL and subchar from s
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    s.truncate(index);
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if((index=s.indexOf((UChar)0x3d))>0) /* '=' */ {
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    // read a substitution string, separated by an equal sign
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    p=s.getBuffer()+index+1;
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    length=s.length()-(index+1);
313f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                    if(length<0 || length>=UPRV_LENGTHOF(cc.subString)) {
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else {
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        u_memcpy(cc.subString, p, length);
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        // NUL-terminate the subString
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        cc.subString[length]=0;
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        cc.setSub=-1;
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    // remove the equal sign and subString from s
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    s.truncate(index);
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.cbopt=cbopt;
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                switch(cbopt[0]) {
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                case SUB_CB:
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    callback=UCNV_FROM_U_CALLBACK_SUBSTITUTE;
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                case SKIP_CB:
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    callback=UCNV_FROM_U_CALLBACK_SKIP;
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                case STOP_CB:
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    callback=UCNV_FROM_U_CALLBACK_STOP;
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                case ESC_CB:
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    callback=UCNV_FROM_U_CALLBACK_ESCAPE;
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                default:
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    callback=NULL;
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                option=callback==NULL ? cbopt : cbopt+1;
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(*option==0) {
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    option=NULL;
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                invalidUChars=testCase->getString("invalidUChars", errorCode);
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.invalidUChars=invalidUChars.getBuffer();
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.invalidLength=invalidUChars.length();
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(U_FAILURE(errorCode)) {
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errln("error parsing conversion/fromUnicode test case %d - %s",
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            i, u_errorName(errorCode));
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errorCode=U_ZERO_ERROR;
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    logln("TestFromUnicode[%d] %s", i, charset);
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    FromUnicodeCase(cc, callback, option);
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            delete testData;
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        delete dataModule;
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    else {
36885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        dataerrln("Could not load test conversion data");
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar ellipsis[]={ 0x2e, 0x2e, 0x2e };
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruConversionTest::TestGetUnicodeSet() {
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char charset[100];
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString s, map, mapnot;
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t which;
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ParsePosition pos;
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeSet cnvSet, mapSet, mapnotSet, diffSet;
382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeSet *cnvSetPtr = &cnvSet;
38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    LocalUConverterPointer cnv;
384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TestDataModule *dataModule;
386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TestData *testData;
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const DataMap *testCase;
388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode errorCode;
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i;
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_SUCCESS(errorCode)) {
394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        testData=dataModule->createTestData("getUnicodeSet", errorCode);
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(U_SUCCESS(errorCode)) {
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            for(i=0; testData->nextCase(testCase, errorCode); ++i) {
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(U_FAILURE(errorCode)) {
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errln("error retrieving conversion/getUnicodeSet test case %d - %s",
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            i, u_errorName(errorCode));
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errorCode=U_ZERO_ERROR;
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    continue;
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                s=testCase->getString("charset", errorCode);
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
40772614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                // BEGIN android-added
40872614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                // To save space, Android does not build full ISO-2022-CN tables.
40972614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                // We skip the TestGetKeywordValuesForLocale for counting available collations.
41072614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                if (strlen(charset) >= 8 &&
41172614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                    strncmp(charset+4, "2022-CN", 4) == 0) {
41272614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                    continue;
41372614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                }
41472614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert                // END android-added
41572614b291cfc75c5d0881dc8fe23b1cdbf278072Fredrik Roubert
416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                map=testCase->getString("map", errorCode);
417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                mapnot=testCase->getString("mapnot", errorCode);
418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                which=testCase->getInt28("which", errorCode);
420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(U_FAILURE(errorCode)) {
422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errln("error parsing conversion/getUnicodeSet test case %d - %s",
423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            i, u_errorName(errorCode));
424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errorCode=U_ZERO_ERROR;
425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    continue;
426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // test this test case
429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                mapSet.clear();
430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                mapnotSet.clear();
431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                pos.setIndex(0);
433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                mapSet.applyPattern(map, pos, 0, NULL, errorCode);
434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(U_FAILURE(errorCode) || pos.getIndex()!=map.length()) {
435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errln("error creating the map set for conversion/getUnicodeSet test case %d - %s\n"
436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          "    error index %d  index %d  U+%04x",
437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), map.char32At(pos.getIndex()));
438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errorCode=U_ZERO_ERROR;
439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    continue;
440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                pos.setIndex(0);
443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                mapnotSet.applyPattern(mapnot, pos, 0, NULL, errorCode);
444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(U_FAILURE(errorCode) || pos.getIndex()!=mapnot.length()) {
445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errln("error creating the mapnot set for conversion/getUnicodeSet test case %d - %s\n"
446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          "    error index %d  index %d  U+%04x",
447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), mapnot.char32At(pos.getIndex()));
448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errorCode=U_ZERO_ERROR;
449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    continue;
450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                logln("TestGetUnicodeSet[%d] %s", i, charset);
453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                cnv.adoptInstead(cnv_open(charset, errorCode));
455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(U_FAILURE(errorCode)) {
45685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    errcheckln(errorCode, "error opening \"%s\" for conversion/getUnicodeSet test case %d - %s",
457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            charset, i, u_errorName(errorCode));
458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errorCode=U_ZERO_ERROR;
459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    continue;
460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
46250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ucnv_getUnicodeSet(cnv.getAlias(), cnvSetPtr->toUSet(), (UConverterUnicodeSet)which, &errorCode);
463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(U_FAILURE(errorCode)) {
465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s",
466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            charset, i, u_errorName(errorCode));
467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errorCode=U_ZERO_ERROR;
468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    continue;
469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // are there items that must be in cnvSet but are not?
472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                (diffSet=mapSet).removeAll(cnvSet);
473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(!diffSet.isEmpty()) {
474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    diffSet.toPattern(s, TRUE);
475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(s.length()>100) {
476f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                        s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",
479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            charset, i);
480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errln(s);
481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // are there items that must not be in cnvSet but are?
484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                (diffSet=mapnotSet).retainAll(cnvSet);
485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(!diffSet.isEmpty()) {
486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    diffSet.toPattern(s, TRUE);
487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(s.length()>100) {
488f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                        s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",
491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            charset, i);
492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errln(s);
493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            delete testData;
496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        delete dataModule;
498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    else {
50085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        dataerrln("Could not load test conversion data");
501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
50485bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CDECL_BEGIN
50585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void U_CALLCONV
506c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste QuerugetUnicodeSetCallback(const void *context,
50785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                      UConverterFromUnicodeArgs * /*fromUArgs*/,
50885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                      const UChar* /*codeUnits*/,
50985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                      int32_t /*length*/,
510c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                      UChar32 codePoint,
511c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                      UConverterCallbackReason reason,
512c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                      UErrorCode *pErrorCode) {
513c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    if(reason<=UCNV_IRREGULAR) {
514c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        ((UnicodeSet *)context)->remove(codePoint);  // the converter cannot convert this code point
515c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        *pErrorCode=U_ZERO_ERROR;                    // skip
516c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    }  // else ignore the reset, close and clone calls.
517c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru}
51885bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CDECL_END
519c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru
520c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru// Compare ucnv_getUnicodeSet() with the set of characters that can be converted.
521c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queruvoid
522c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste QueruConversionTest::TestGetUnicodeSet2() {
523c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    // Build a string with all code points.
524c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    UChar32 cpLimit;
525c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    int32_t s0Length;
526c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    if(quick) {
527c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        cpLimit=s0Length=0x10000;  // BMP only
528c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    } else {
529c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        cpLimit=0x110000;
530c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        s0Length=0x10000+0x200000;  // BMP + surrogate pairs
531c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    }
532c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    UChar *s0=new UChar[s0Length];
533c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    if(s0==NULL) {
534c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        return;
535c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    }
536c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    UChar *s=s0;
537c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    UChar32 c;
538c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    UChar c2;
539c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    // low BMP
540c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    for(c=0; c<=0xd7ff; ++c) {
541c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        *s++=(UChar)c;
542c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    }
543c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    // trail surrogates
544c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    for(c=0xdc00; c<=0xdfff; ++c) {
545c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        *s++=(UChar)c;
546c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    }
547c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    // lead surrogates
548c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    // (after trails so that there is not even one surrogate pair in between)
549c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    for(c=0xd800; c<=0xdbff; ++c) {
550c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        *s++=(UChar)c;
551c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    }
552c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    // high BMP
553c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    for(c=0xe000; c<=0xffff; ++c) {
554c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        *s++=(UChar)c;
555c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    }
556c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    // supplementary code points = surrogate pairs
557c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    if(cpLimit==0x110000) {
558c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        for(c=0xd800; c<=0xdbff; ++c) {
559c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            for(c2=0xdc00; c2<=0xdfff; ++c2) {
560c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                *s++=(UChar)c;
561c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                *s++=c2;
562c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            }
563c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        }
564c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    }
565c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru
566c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    static const char *const cnvNames[]={
567c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        "UTF-8",
568c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        "UTF-7",
569c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        "UTF-16",
570c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        "US-ASCII",
571c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        "ISO-8859-1",
572c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        "windows-1252",
573c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        "Shift-JIS",
574c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        "ibm-1390",  // EBCDIC_STATEFUL table
575c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        "ibm-16684",  // DBCS-only extension table based on EBCDIC_STATEFUL table
576c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        "HZ",
577c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        "ISO-2022-JP",
578c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        "JIS7",
579c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        "ISO-2022-CN",
580c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        "ISO-2022-CN-EXT",
581c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        "LMBCS"
582c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    };
58350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    LocalUConverterPointer cnv;
584c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    char buffer[1024];
585c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    int32_t i;
586f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    for(i=0; i<UPRV_LENGTHOF(cnvNames); ++i) {
587c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        UErrorCode errorCode=U_ZERO_ERROR;
58850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        cnv.adoptInstead(cnv_open(cnvNames[i], errorCode));
589c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        if(U_FAILURE(errorCode)) {
59085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            errcheckln(errorCode, "failed to open converter %s - %s", cnvNames[i], u_errorName(errorCode));
591c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            continue;
592c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        }
593c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        UnicodeSet expected;
59450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ucnv_setFromUCallBack(cnv.getAlias(), getUnicodeSetCallback, &expected, NULL, NULL, &errorCode);
595c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        if(U_FAILURE(errorCode)) {
596c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            errln("failed to set the callback on converter %s - %s", cnvNames[i], u_errorName(errorCode));
597c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            continue;
598c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        }
599c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        UConverterUnicodeSet which;
600c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUnicodeSet)((int)which+1)) {
601c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
60250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ucnv_setFallback(cnv.getAlias(), TRUE);
603c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            }
604c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            expected.add(0, cpLimit-1);
605c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            s=s0;
606c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            UBool flush;
607c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            do {
608c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                char *t=buffer;
609c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                flush=(UBool)(s==s0+s0Length);
61050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ucnv_fromUnicode(cnv.getAlias(), &t, buffer+sizeof(buffer), (const UChar **)&s, s0+s0Length, NULL, flush, &errorCode);
611c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                if(U_FAILURE(errorCode)) {
612c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
613c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                        errorCode=U_ZERO_ERROR;
614c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                        continue;
615c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                    } else {
616c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                        break;  // unexpected error, should not occur
617c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                    }
618c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                }
619c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            } while(!flush);
620c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            UnicodeSet set;
62150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ucnv_getUnicodeSet(cnv.getAlias(), set.toUSet(), which, &errorCode);
622c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            if(cpLimit<0x110000) {
623c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                set.remove(cpLimit, 0x10ffff);
624c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            }
625c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            if(which==UCNV_ROUNDTRIP_SET) {
626c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                // ignore PUA code points because they will be converted even if they
627c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                // are fallbacks and when other fallbacks are turned off,
628c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roundtrips
629c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                expected.remove(0xe000, 0xf8ff);
630c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                expected.remove(0xf0000, 0xffffd);
631c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                expected.remove(0x100000, 0x10fffd);
632c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                set.remove(0xe000, 0xf8ff);
633c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                set.remove(0xf0000, 0xffffd);
634c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                set.remove(0x100000, 0x10fffd);
635c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            }
636c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            if(set!=expected) {
637c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                // First try to see if we have different sets because ucnv_getUnicodeSet()
638c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                // added strings: The above conversion method does not tell us what strings might be convertible.
639c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                // Remove strings from the set and compare again.
640c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                // Unfortunately, there are no good, direct set methods for finding out whether there are strings
641c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                // in the set, nor for enumerating or removing just them.
642c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                // Intersect all code points with the set. The intersection will not contain strings.
643c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                UnicodeSet temp(0, 0x10ffff);
644c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                temp.retainAll(set);
645c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                set=temp;
646c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            }
647c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            if(set!=expected) {
648c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                UnicodeSet diffSet;
649c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                UnicodeString out;
650c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru
651c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                // are there items that must be in the set but are not?
652c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                (diffSet=expected).removeAll(set);
653c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                if(!diffSet.isEmpty()) {
654c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                    diffSet.toPattern(out, TRUE);
655c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                    if(out.length()>100) {
656f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                        out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
657c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                    }
658c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                    errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",
659c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            cnvNames[i], which);
660c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                    errln(out);
661c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                }
662c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru
663c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                // are there items that must not be in the set but are?
664c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                (diffSet=set).removeAll(expected);
665c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                if(!diffSet.isEmpty()) {
666c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                    diffSet.toPattern(out, TRUE);
667c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                    if(out.length()>100) {
668f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                        out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
669c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                    }
670c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                    errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",
671c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            cnvNames[i], which);
672c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                    errln(out);
673c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                }
674c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru            }
675c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        }
676c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    }
677c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru
678c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    delete [] s0;
679c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru}
680c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru
681f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius// Test all codepoints which has the default ignorable Unicode property are ignored if they have no mapping
682f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius// If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) in ucnv_err.c should be updated
683f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusvoid
684f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusConversionTest::TestDefaultIgnorableCallback() {
685f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    UErrorCode status = U_ZERO_ERROR;
686f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    const char *cnv_name = "euc-jp-2007";
687f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]";
688f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]";
689f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
690f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    UnicodeSet *set_ignorable = new UnicodeSet(pattern_ignorable, status);
691f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if (U_FAILURE(status)) {
692f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status));
693f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        return;
694f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    }
695f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
696f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    UnicodeSet *set_not_ignorable = new UnicodeSet(pattern_not_ignorable, status);
697f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if (U_FAILURE(status)) {
698f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status));
699f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        return;
700f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    }
701f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
702f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    UConverter *cnv = cnv_open(cnv_name, status);
703f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if (U_FAILURE(status)) {
704f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status));
705f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        return;
706f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    }
707f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
708f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    // set callback for the converter
709f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);
710f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
711f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    UChar32 input[1];
712f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    char output[10];
713f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    int32_t outputLength;
714f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
715f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    // test default ignorables are ignored
716f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    int size = set_ignorable->size();
717f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    for (int i = 0; i < size; i++) {
718f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        status = U_ZERO_ERROR;
719f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        outputLength= 0;
720f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
721f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        input[0] = set_ignorable->charAt(i);
722f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
723f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
724f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        if (U_FAILURE(status) || outputLength != 0) {
725f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            errln("Ignorable code point: U+%04X not skipped as expected - %s", input[0], u_errorName(status));
726f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        }
727f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    }
728f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
729f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    // test non-ignorables are not ignored
730f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    size = set_not_ignorable->size();
731f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    for (int i = 0; i < size; i++) {
732f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        status = U_ZERO_ERROR;
733f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        outputLength= 0;
734f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
735f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        input[0] = set_not_ignorable->charAt(i);
736f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
737f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        if (input[0] == 0) {
738f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            continue;
739f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        }
740f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
741f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
742f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        if (U_FAILURE(status) || outputLength <= 0) {
743f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s", input[0], u_errorName(status));
744f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        }
745f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    }
746f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
747f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    ucnv_close(cnv);
748f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    delete set_not_ignorable;
749f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    delete set_ignorable;
750f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius}
751f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// open testdata or ICU data converter ------------------------------------- ***
753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter *
755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruConversionTest::cnv_open(const char *name, UErrorCode &errorCode) {
7568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if(name!=NULL && *name=='+') {
7578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        // Converter names that start with '+' are ignored in ICU4J tests.
7588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        ++name;
7598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(name!=NULL && *name=='*') {
761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* loadTestData(): set the data directory */
762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode);
763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ucnv_open(name, &errorCode);
765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// output helpers ---------------------------------------------------------- ***
769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic inline char
771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruhexDigit(uint8_t digit) {
772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit);
773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic char *
776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruprintBytes(const uint8_t *bytes, int32_t length, char *out) {
777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t b;
778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(length>0) {
780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        b=*bytes++;
781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        --length;
782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *out++=hexDigit((uint8_t)(b>>4));
783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *out++=hexDigit((uint8_t)(b&0xf));
784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(length>0) {
787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        b=*bytes++;
788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        --length;
789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *out++=' ';
790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *out++=hexDigit((uint8_t)(b>>4));
791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *out++=hexDigit((uint8_t)(b&0xf));
792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *out++=0;
794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return out;
795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic char *
798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruprintUnicode(const UChar *unicode, int32_t length, char *out) {
799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c;
800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i;
801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=0; i<length;) {
803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(i>0) {
804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *out++=' ';
805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        U16_NEXT(unicode, i, length, c);
807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // write 4..6 digits
808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(c>=0x100000) {
809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *out++='1';
810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(c>=0x10000) {
812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *out++=hexDigit((uint8_t)((c>>16)&0xf));
813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *out++=hexDigit((uint8_t)((c>>12)&0xf));
815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *out++=hexDigit((uint8_t)((c>>8)&0xf));
816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *out++=hexDigit((uint8_t)((c>>4)&0xf));
817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *out++=hexDigit((uint8_t)(c&0xf));
818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *out++=0;
820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return out;
821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic char *
824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruprintOffsets(const int32_t *offsets, int32_t length, char *out) {
825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i, o, d;
826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(offsets==NULL) {
828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        length=0;
829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=0; i<length; ++i) {
832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(i>0) {
833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *out++=' ';
834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        o=offsets[i];
836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // print all offsets with 2 characters each (-x, -9..99, xx)
838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(o<-9) {
839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *out++='-';
840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *out++='x';
841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(o<0) {
842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *out++='-';
843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *out++=(char)('0'-o);
844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(o<=99) {
845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *out++=(d=o/10)==0 ? ' ' : (char)('0'+d);
846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *out++=(char)('0'+o%10);
847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else /* o>99 */ {
848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *out++='x';
849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *out++='x';
850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *out++=0;
853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return out;
854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// toUnicode test worker functions ----------------------------------------- ***
857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t
859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustepToUnicode(ConversionCase &cc, UConverter *cnv,
860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              UChar *result, int32_t resultCapacity,
861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              int32_t *resultOffsets, /* also resultCapacity */
862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              int32_t step,
863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              UErrorCode *pErrorCode) {
864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char *source, *sourceLimit, *bytesLimit;
865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar *target, *targetLimit, *resultLimit;
866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool flush;
867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=(const char *)cc.bytes;
869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=result;
870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    bytesLimit=source+cc.bytesLength;
871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    resultLimit=result+resultCapacity;
872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(step>=0) {
874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time
875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // move only one buffer (in vs. out) at a time to be extra mean
876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // step==0 performs bulk conversion and generates offsets
877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // initialize the partial limits for the loop
879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(step==0) {
880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // use the entire buffers
881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            sourceLimit=bytesLimit;
882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            targetLimit=resultLimit;
883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            flush=cc.finalFlush;
884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // start with empty partial buffers
886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            sourceLimit=source;
887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            targetLimit=target;
888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            flush=FALSE;
889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // output offsets only for bulk conversion
891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            resultOffsets=NULL;
892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(;;) {
895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // resetting the opposite conversion direction must not affect this one
896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_resetFromUnicode(cnv);
897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // convert
899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_toUnicode(cnv,
900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                &target, targetLimit,
901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                &source, sourceLimit,
902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                resultOffsets,
903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                flush, pErrorCode);
904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // check pointers and errors
906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(source>sourceLimit || target>targetLimit) {
907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(target!=targetLimit) {
911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    // buffer overflow must only be set when the target is filled
912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(targetLimit==resultLimit) {
915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    // not just a partial overflow
916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // the partial target is filled, set a new limit, reset the error and continue
920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_ZERO_ERROR;
922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(U_FAILURE(*pErrorCode)) {
923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // some other error occurred, done
924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(source!=sourceLimit) {
927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    // when no error occurs, then the input must be consumed
928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(sourceLimit==bytesLimit) {
933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    // we are done
934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // the partial conversion succeeded, set a new limit and continue
938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                sourceLimit=(bytesLimit-source)>=step ? source+step : bytesLimit;
939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                flush=(UBool)(cc.finalFlush && sourceLimit==bytesLimit);
940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else /* step<0 */ {
943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * step==-1: call only ucnv_getNextUChar()
945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()
946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *   if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,
947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *   else give it at most (-step-2)/2 bytes
948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar32 c;
950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // end the loop by getting an index out of bounds error
952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(;;) {
953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // resetting the opposite conversion direction must not affect this one
954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_resetFromUnicode(cnv);
955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // convert
957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if((step&1)!=0 /* odd: -1, -3, -5, ... */) {
958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                sourceLimit=source; // use sourceLimit not as a real limit
959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                    // but to remember the pre-getNextUChar source pointer
960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c=ucnv_getNextUChar(cnv, &source, bytesLimit, pErrorCode);
961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // check pointers and errors
963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(source!=bytesLimit) {
965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else {
967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *pErrorCode=U_ZERO_ERROR;
968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(U_FAILURE(*pErrorCode)) {
971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // source may not move if c is from previous overflow
974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(target==resultLimit) {
976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(c<=0xffff) {
980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=(UChar)c;
981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=U16_LEAD(c);
983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(target==resultLimit) {
984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=U16_TRAIL(c);
988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // alternate between -n-1 and -n but leave -1 alone
991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(step<-1) {
992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ++step;
993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else /* step is even */ {
995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // allow only one UChar output
996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                targetLimit=target<resultLimit ? target+1 : resultLimit;
997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // as with ucnv_getNextUChar(), we always flush (if we go to bytesLimit)
999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // and never output offsets
1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(step==-2) {
1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    sourceLimit=bytesLimit;
1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    sourceLimit=source+(-step-2)/2;
1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(sourceLimit>bytesLimit) {
1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        sourceLimit=bytesLimit;
1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ucnv_toUnicode(cnv,
1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    &target, targetLimit,
1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    &source, sourceLimit,
1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    NULL, (UBool)(sourceLimit==bytesLimit), pErrorCode);
1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // check pointers and errors
1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(target!=targetLimit) {
1017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        // buffer overflow must only be set when the target is filled
1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else if(targetLimit==resultLimit) {
1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        // not just a partial overflow
1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    // the partial target is filled, set a new limit and continue
1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_ZERO_ERROR;
1027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else if(U_FAILURE(*pErrorCode)) {
1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    // some other error occurred, done
1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(source!=sourceLimit) {
1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        // when no error occurs, then the input must be consumed
1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    // we are done (flush==TRUE) but we continue, to get the index out of bounds error above
1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --step;
1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (int32_t)(target-result);
1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool
1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback, const char *option) {
1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // open the converter
105150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IcuTestErrorCode errorCode(*this, "ToUnicodeCase");
105250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode));
105350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(errorCode.isFailure()) {
105485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
105550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, errorCode.errorName());
105650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errorCode.reset();
1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // set the callback
1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(callback!=NULL) {
106250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ucnv_setToUCallBack(cnv.getAlias(), callback, option, NULL, NULL, errorCode);
1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(U_FAILURE(errorCode)) {
1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s",
1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return FALSE;
1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t resultOffsets[256];
1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar result[256];
1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t resultLength;
1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool ok;
1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static const struct {
1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t step;
1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const char *name;
1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } steps[]={
1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        { 0, "bulk" }, // must be first for offsets to be checked
1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        { 1, "step=1" },
1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        { 3, "step=3" },
1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        { 7, "step=7" },
1083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        { -1, "getNext" },
1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        { -2, "toU(bulk)+getNext" },
1085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        { -3, "getNext+toU(bulk)" },
1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        { -4, "toU(1)+getNext" },
1087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        { -5, "getNext+toU(1)" },
1088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        { -12, "toU(5)+getNext" },
1089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        { -13, "getNext+toU(5)" },
1090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    };
1091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i, step;
1092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1093ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ok=TRUE;
1094f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
1095ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        step=steps[i].step;
1096ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(step<0 && !cc.finalFlush) {
1097ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // skip ucnv_getNextUChar() if !finalFlush because
1098ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // ucnv_getNextUChar() always implies flush
1099ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            continue;
1100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(step!=0) {
1102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // bulk test is first, then offsets are not checked any more
1103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cc.offsets=NULL;
1104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else {
1106f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));
1107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1108f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        memset(result, -1, UPRV_LENGTHOF(result));
110950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errorCode.reset();
111050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        resultLength=stepToUnicode(cc, cnv.getAlias(),
1111f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                                result, UPRV_LENGTHOF(result),
1112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                step==0 ? resultOffsets : NULL,
111350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                step, errorCode);
1114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ok=checkToUnicode(
111550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                cc, cnv.getAlias(), steps[i].name,
1116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                result, resultLength,
1117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.offsets!=NULL ? resultOffsets : NULL,
1118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                errorCode);
111950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(errorCode.isFailure() || !cc.finalFlush) {
1120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // reset if an error occurred or we did not flush
1121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // otherwise do nothing to make sure that flushing resets
112250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ucnv_resetToUnicode(cnv.getAlias());
1123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
112450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (cc.offsets != NULL && resultOffsets[resultLength] != -1) {
1125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errln("toUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
1126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.caseNr, cc.charset, resultLength);
1127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (result[resultLength] != (UChar)-1) {
1129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errln("toUnicode[%d](%s) Conversion wrote too much to result at index %d",
1130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.caseNr, cc.charset, resultLength);
1131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // not a real loop, just a convenience for breaking out of the block
1135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(ok && cc.finalFlush) {
1136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // test ucnv_toUChars()
1137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        memset(result, 0, sizeof(result));
1138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
113950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errorCode.reset();
114050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        resultLength=ucnv_toUChars(cnv.getAlias(),
1141f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                        result, UPRV_LENGTHOF(result),
1142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        (const char *)cc.bytes, cc.bytesLength,
114350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        errorCode);
1144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ok=checkToUnicode(
114550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                cc, cnv.getAlias(), "toUChars",
1146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                result, resultLength,
1147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                NULL,
1148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                errorCode);
1149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(!ok) {
1150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
1151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // test preflighting
1154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // keep the correct result for simple checking
115550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errorCode.reset();
115650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        resultLength=ucnv_toUChars(cnv.getAlias(),
1157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        NULL, 0,
1158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        (const char *)cc.bytes, cc.bytesLength,
115950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        errorCode);
116050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(errorCode.get()==U_STRING_NOT_TERMINATED_WARNING || errorCode.get()==U_BUFFER_OVERFLOW_ERROR) {
116150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errorCode.reset();
1162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ok=checkToUnicode(
116450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                cc, cnv.getAlias(), "preflight toUChars",
1165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                result, resultLength,
1166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                NULL,
1167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                errorCode);
1168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        break;
1169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
117150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    errorCode.reset();  // all errors have already been reported
1172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return ok;
1173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool
1176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruConversionTest::checkToUnicode(ConversionCase &cc, UConverter *cnv, const char *name,
1177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                               const UChar *result, int32_t resultLength,
1178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                               const int32_t *resultOffsets,
1179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                               UErrorCode resultErrorCode) {
1180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char resultInvalidChars[8];
1181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t resultInvalidLength;
1182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode errorCode;
1183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char *msg;
1185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // reset the message; NULL will mean "ok"
1187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    msg=NULL;
1188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
1190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    resultInvalidLength=sizeof(resultInvalidChars);
1191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_getInvalidChars(cnv, resultInvalidChars, &resultInvalidLength, &errorCode);
1192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
1193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChars() failed - %s",
1194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));
1195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
1196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // check everything that might have gone wrong
1199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(cc.unicodeLength!=resultLength) {
1200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        msg="wrong result length";
1201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(0!=u_memcmp(cc.unicode, result, cc.unicodeLength)) {
1202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        msg="wrong result string";
1203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.unicodeLength*sizeof(*cc.offsets))) {
1204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        msg="wrong offsets";
1205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(cc.outErrorCode!=resultErrorCode) {
1206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        msg="wrong error code";
1207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(cc.invalidLength!=resultInvalidLength) {
1208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        msg="wrong length of last invalid input";
1209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(0!=memcmp(cc.invalidChars, resultInvalidChars, cc.invalidLength)) {
1210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        msg="wrong last invalid input";
1211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(msg==NULL) {
1214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
1215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
1216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        char buffer[2000]; // one buffer for all strings
1217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        char *s, *bytesString, *unicodeString, *resultString,
1218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *offsetsString, *resultOffsetsString,
1219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *invalidCharsString, *resultInvalidCharsString;
1220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        bytesString=s=buffer;
1222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=printBytes(cc.bytes, cc.bytesLength, bytesString);
1223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString=s);
1224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=printUnicode(result, resultLength, resultString=s);
1225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=printOffsets(cc.offsets, cc.unicodeLength, offsetsString=s);
1226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);
1227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=printBytes(cc.invalidChars, cc.invalidLength, invalidCharsString=s);
1228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=printBytes((uint8_t *)resultInvalidChars, resultInvalidLength, resultInvalidCharsString=s);
1229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if((s-buffer)>(int32_t)sizeof(buffer)) {
1231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkToUnicode() test output buffer overflow writing %d chars\n",
1232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));
1233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            exit(1);
1234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
1237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              "  bytes <%s>[%d]\n"
1238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              " expected <%s>[%d]\n"
1239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              "  result  <%s>[%d]\n"
1240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              " offsets         <%s>\n"
1241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              "  result offsets <%s>\n"
1242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              " error code expected %s got %s\n"
1243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              "  invalidChars expected <%s> got <%s>\n",
1244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,
1245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              bytesString, cc.bytesLength,
1246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              unicodeString, cc.unicodeLength,
1247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              resultString, resultLength,
1248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              offsetsString,
1249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              resultOffsetsString,
1250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),
1251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              invalidCharsString, resultInvalidCharsString);
1252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
1254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// fromUnicode test worker functions --------------------------------------- ***
1258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t
1260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustepFromUTF8(ConversionCase &cc,
1261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             UConverter *utf8Cnv, UConverter *cnv,
1262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             char *result, int32_t resultCapacity,
1263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             int32_t step,
1264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             UErrorCode *pErrorCode) {
1265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char *source, *sourceLimit, *utf8Limit;
1266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar pivotBuffer[32];
1267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar *pivotSource, *pivotTarget, *pivotLimit;
1268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char *target, *targetLimit, *resultLimit;
1269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool flush;
1270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=cc.utf8;
1272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pivotSource=pivotTarget=pivotBuffer;
1273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=result;
1274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utf8Limit=source+cc.utf8Length;
1275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    resultLimit=result+resultCapacity;
1276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // call ucnv_convertEx() with in/out buffers no larger than (step) at a time
1278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // move only one buffer (in vs. out) at a time to be extra mean
1279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // step==0 performs bulk conversion
1280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // initialize the partial limits for the loop
1282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(step==0) {
1283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // use the entire buffers
1284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sourceLimit=utf8Limit;
1285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetLimit=resultLimit;
1286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        flush=cc.finalFlush;
1287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1288f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        pivotLimit=pivotBuffer+UPRV_LENGTHOF(pivotBuffer);
1289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
1290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // start with empty partial buffers
1291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sourceLimit=source;
1292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetLimit=target;
1293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        flush=FALSE;
1294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // empty pivot is not allowed, make it of length step
1296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pivotLimit=pivotBuffer+step;
1297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(;;) {
1300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // resetting the opposite conversion direction must not affect this one
1301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_resetFromUnicode(utf8Cnv);
1302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_resetToUnicode(cnv);
1303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // convert
1305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_convertEx(cnv, utf8Cnv,
1306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            &target, targetLimit,
1307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            &source, sourceLimit,
1308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
1309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            FALSE, flush, pErrorCode);
1310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // check pointers and errors
1312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(source>sourceLimit || target>targetLimit) {
1313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
1315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(target!=targetLimit) {
1317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // buffer overflow must only be set when the target is filled
1318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(targetLimit==resultLimit) {
1321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // not just a partial overflow
1322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // the partial target is filled, set a new limit, reset the error and continue
1326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
1327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pErrorCode=U_ZERO_ERROR;
1328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(U_FAILURE(*pErrorCode)) {
1329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(pivotSource==pivotBuffer) {
1330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // toUnicode error, should not occur
1331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // toUnicode errors are tested in cintltst TestConvertExFromUTF8()
1332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
1334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // fromUnicode error
1335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // some other error occurred, done
1336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
1339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(source!=sourceLimit) {
1340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // when no error occurs, then the input must be consumed
1341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(sourceLimit==utf8Limit) {
1346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // we are done
1347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
1348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    // ucnv_convertEx() warns about not terminating the output
1349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    // but ucnv_fromUnicode() does not and so
1350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    // checkFromUnicode() does not expect it
1351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *pErrorCode=U_ZERO_ERROR;
1352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // the partial conversion succeeded, set a new limit and continue
1357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            sourceLimit=(utf8Limit-source)>=step ? source+step : utf8Limit;
1358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            flush=(UBool)(cc.finalFlush && sourceLimit==utf8Limit);
1359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (int32_t)(target-result);
1363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t
1366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustepFromUnicode(ConversionCase &cc, UConverter *cnv,
1367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                char *result, int32_t resultCapacity,
1368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                int32_t *resultOffsets, /* also resultCapacity */
1369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                int32_t step,
1370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                UErrorCode *pErrorCode) {
1371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *source, *sourceLimit, *unicodeLimit;
1372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char *target, *targetLimit, *resultLimit;
1373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool flush;
1374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=cc.unicode;
1376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=result;
1377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    unicodeLimit=source+cc.unicodeLength;
1378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    resultLimit=result+resultCapacity;
1379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time
1381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // move only one buffer (in vs. out) at a time to be extra mean
1382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // step==0 performs bulk conversion and generates offsets
1383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // initialize the partial limits for the loop
1385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(step==0) {
1386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // use the entire buffers
1387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sourceLimit=unicodeLimit;
1388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetLimit=resultLimit;
1389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        flush=cc.finalFlush;
1390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
1391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // start with empty partial buffers
1392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sourceLimit=source;
1393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        targetLimit=target;
1394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        flush=FALSE;
1395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // output offsets only for bulk conversion
1397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        resultOffsets=NULL;
1398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(;;) {
1401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // resetting the opposite conversion direction must not affect this one
1402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_resetToUnicode(cnv);
1403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // convert
1405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_fromUnicode(cnv,
1406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            &target, targetLimit,
1407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            &source, sourceLimit,
1408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            resultOffsets,
1409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            flush, pErrorCode);
1410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // check pointers and errors
1412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(source>sourceLimit || target>targetLimit) {
1413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
1415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(target!=targetLimit) {
1417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // buffer overflow must only be set when the target is filled
1418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(targetLimit==resultLimit) {
1421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // not just a partial overflow
1422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // the partial target is filled, set a new limit, reset the error and continue
1426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
1427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pErrorCode=U_ZERO_ERROR;
1428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(U_FAILURE(*pErrorCode)) {
1429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // some other error occurred, done
1430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
1431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
1432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(source!=sourceLimit) {
1433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // when no error occurs, then the input must be consumed
1434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
1435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(sourceLimit==unicodeLimit) {
1439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // we are done
1440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
1441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // the partial conversion succeeded, set a new limit and continue
1444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            sourceLimit=(unicodeLimit-source)>=step ? source+step : unicodeLimit;
1445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            flush=(UBool)(cc.finalFlush && sourceLimit==unicodeLimit);
1446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (int32_t)(target-result);
1450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool
1453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback callback, const char *option) {
1454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *cnv;
1455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode errorCode;
1456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // open the converter
1458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
1459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv=cnv_open(cc.charset, errorCode);
1460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
146185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        errcheckln(errorCode, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
1462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
1464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_resetToUnicode(utf8Cnv);
1466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // set the callback
1468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(callback!=NULL) {
1469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_setFromUCallBack(cnv, callback, option, NULL, NULL, &errorCode);
1470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(U_FAILURE(errorCode)) {
1471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s",
1472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_close(cnv);
1474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return FALSE;
1475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // set the fallbacks flag
1479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // TODO change with Jitterbug 2401, then add a similar call for toUnicode too
1480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_setFallback(cnv, cc.fallbacks);
1481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // set the subchar
1483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t length;
1484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(cc.setSub>0) {
1486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        length=(int32_t)strlen(cc.subchar);
1487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_setSubstChars(cnv, cc.subchar, (int8_t)length, &errorCode);
1488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(U_FAILURE(errorCode)) {
1489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstChars() failed - %s",
1490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_close(cnv);
1492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return FALSE;
1493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(cc.setSub<0) {
1495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_setSubstString(cnv, cc.subString, -1, &errorCode);
1496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(U_FAILURE(errorCode)) {
1497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstString() failed - %s",
1498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
1499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_close(cnv);
1500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return FALSE;
1501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // convert unicode to utf8
1505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char utf8[256];
1506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cc.utf8=utf8;
1507f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    u_strToUTF8(utf8, UPRV_LENGTHOF(utf8), &cc.utf8Length,
1508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.unicode, cc.unicodeLength,
1509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                &errorCode);
1510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
1511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // skip UTF-8 testing of a string with an unpaired surrogate,
1512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // or of one that's too long
1513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // toUnicode errors are tested in cintltst TestConvertExFromUTF8()
1514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cc.utf8Length=-1;
1515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t resultOffsets[256];
1518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char result[256];
1519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t resultLength;
1520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool ok;
1521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static const struct {
1523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t step;
1524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const char *name, *utf8Name;
1525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } steps[]={
1526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        { 0, "bulk",   "utf8" }, // must be first for offsets to be checked
1527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        { 1, "step=1", "utf8 step=1" },
1528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        { 3, "step=3", "utf8 step=3" },
1529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        { 7, "step=7", "utf8 step=7" }
1530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    };
1531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i, step;
1532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ok=TRUE;
1534f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
1535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        step=steps[i].step;
1536f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));
1537f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        memset(result, -1, UPRV_LENGTHOF(result));
1538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        errorCode=U_ZERO_ERROR;
1539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        resultLength=stepFromUnicode(cc, cnv,
1540f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                                result, UPRV_LENGTHOF(result),
1541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                step==0 ? resultOffsets : NULL,
1542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                step, &errorCode);
1543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ok=checkFromUnicode(
1544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc, cnv, steps[i].name,
1545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                (uint8_t *)result, resultLength,
1546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.offsets!=NULL ? resultOffsets : NULL,
1547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                errorCode);
1548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(U_FAILURE(errorCode) || !cc.finalFlush) {
1549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // reset if an error occurred or we did not flush
1550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // otherwise do nothing to make sure that flushing resets
1551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_resetFromUnicode(cnv);
1552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (resultOffsets[resultLength] != -1) {
1554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errln("fromUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
1555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.caseNr, cc.charset, resultLength);
1556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (result[resultLength] != (char)-1) {
1558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errln("fromUnicode[%d](%s) Conversion wrote too much to result at index %d",
1559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.caseNr, cc.charset, resultLength);
1560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // bulk test is first, then offsets are not checked any more
1563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cc.offsets=NULL;
1564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // test direct conversion from UTF-8
1566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(cc.utf8Length>=0) {
1567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errorCode=U_ZERO_ERROR;
1568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            resultLength=stepFromUTF8(cc, utf8Cnv, cnv,
1569f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                                    result, UPRV_LENGTHOF(result),
1570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                    step, &errorCode);
1571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ok=checkFromUnicode(
1572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc, cnv, steps[i].utf8Name,
1573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    (uint8_t *)result, resultLength,
1574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    NULL,
1575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errorCode);
1576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(U_FAILURE(errorCode) || !cc.finalFlush) {
1577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // reset if an error occurred or we did not flush
1578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // otherwise do nothing to make sure that flushing resets
1579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ucnv_resetToUnicode(utf8Cnv);
1580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ucnv_resetFromUnicode(cnv);
1581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // not a real loop, just a convenience for breaking out of the block
1586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(ok && cc.finalFlush) {
1587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // test ucnv_fromUChars()
1588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        memset(result, 0, sizeof(result));
1589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        errorCode=U_ZERO_ERROR;
1591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        resultLength=ucnv_fromUChars(cnv,
1592f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                        result, UPRV_LENGTHOF(result),
1593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        cc.unicode, cc.unicodeLength,
1594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        &errorCode);
1595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ok=checkFromUnicode(
1596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc, cnv, "fromUChars",
1597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                (uint8_t *)result, resultLength,
1598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                NULL,
1599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                errorCode);
1600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(!ok) {
1601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
1602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // test preflighting
1605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // keep the correct result for simple checking
1606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        errorCode=U_ZERO_ERROR;
1607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        resultLength=ucnv_fromUChars(cnv,
1608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        NULL, 0,
1609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        cc.unicode, cc.unicodeLength,
1610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        &errorCode);
1611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(errorCode==U_STRING_NOT_TERMINATED_WARNING || errorCode==U_BUFFER_OVERFLOW_ERROR) {
1612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errorCode=U_ZERO_ERROR;
1613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ok=checkFromUnicode(
1615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc, cnv, "preflight fromUChars",
1616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                (uint8_t *)result, resultLength,
1617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                NULL,
1618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                errorCode);
1619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        break;
1620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_close(cnv);
1623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return ok;
1624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool
1627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruConversionTest::checkFromUnicode(ConversionCase &cc, UConverter *cnv, const char *name,
1628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                 const uint8_t *result, int32_t resultLength,
1629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                 const int32_t *resultOffsets,
1630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                 UErrorCode resultErrorCode) {
1631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar resultInvalidUChars[8];
1632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t resultInvalidLength;
1633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode errorCode;
1634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char *msg;
1636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // reset the message; NULL will mean "ok"
1638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    msg=NULL;
1639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    errorCode=U_ZERO_ERROR;
1641f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    resultInvalidLength=UPRV_LENGTHOF(resultInvalidUChars);
1642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode);
1643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
1644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",
1645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));
1646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
1647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // check everything that might have gone wrong
1650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(cc.bytesLength!=resultLength) {
1651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        msg="wrong result length";
1652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(0!=memcmp(cc.bytes, result, cc.bytesLength)) {
1653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        msg="wrong result string";
1654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.bytesLength*sizeof(*cc.offsets))) {
1655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        msg="wrong offsets";
1656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(cc.outErrorCode!=resultErrorCode) {
1657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        msg="wrong error code";
1658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(cc.invalidLength!=resultInvalidLength) {
1659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        msg="wrong length of last invalid input";
1660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(0!=u_memcmp(cc.invalidUChars, resultInvalidUChars, cc.invalidLength)) {
1661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        msg="wrong last invalid input";
1662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(msg==NULL) {
1665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
1666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
1667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        char buffer[2000]; // one buffer for all strings
1668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        char *s, *unicodeString, *bytesString, *resultString,
1669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *offsetsString, *resultOffsetsString,
1670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *invalidCharsString, *resultInvalidUCharsString;
1671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        unicodeString=s=buffer;
1673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString);
1674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=printBytes(cc.bytes, cc.bytesLength, bytesString=s);
1675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=printBytes(result, resultLength, resultString=s);
1676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=printOffsets(cc.offsets, cc.bytesLength, offsetsString=s);
1677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);
1678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=printUnicode(cc.invalidUChars, cc.invalidLength, invalidCharsString=s);
1679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=printUnicode(resultInvalidUChars, resultInvalidLength, resultInvalidUCharsString=s);
1680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if((s-buffer)>(int32_t)sizeof(buffer)) {
1682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n",
1683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));
1684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            exit(1);
1685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
1688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              "  unicode <%s>[%d]\n"
1689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              " expected <%s>[%d]\n"
1690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              "  result  <%s>[%d]\n"
1691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              " offsets         <%s>\n"
1692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              "  result offsets <%s>\n"
1693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              " error code expected %s got %s\n"
1694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              "  invalidChars expected <%s> got <%s>\n",
1695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,
1696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              unicodeString, cc.unicodeLength,
1697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              bytesString, cc.bytesLength,
1698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              resultString, resultLength,
1699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              offsetsString,
1700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              resultOffsetsString,
1701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),
1702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              invalidCharsString, resultInvalidUCharsString);
1703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
1705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
1709