1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru **********************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   Copyright (C) 2002-2007, International Business Machines
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   Corporation and others.  All Rights Reserved.
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru **********************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   file name:  utfperf.cpp
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   encoding:   US-ASCII
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   tab size:   8 (not used)
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   indentation:4
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   created on: 2005Nov17
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   created by: Raymond Yang
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   Ported from utfper.c created by Markus W. Scherer
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   Performance test program for Unicode converters
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h>
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdlib.h>
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uperf.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uoptions.h"
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* definitions and text buffers */
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define INPUT_CAPACITY (1024*1024)
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define INTERMEDIATE_CAPACITY 4096
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define INTERMEDIATE_SMALL_CAPACITY 20
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define PIVOT_CAPACITY 1024
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define OUTPUT_CAPACITY INPUT_CAPACITY
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic char utf8[INPUT_CAPACITY];
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar pivot[INTERMEDIATE_CAPACITY];
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar output[OUTPUT_CAPACITY];
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic char intermediate[OUTPUT_CAPACITY];
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t utf8Length, encodedLength, outputLength, countInputCodePoints;
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t fromUCallbackCount;
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Command-line options specific to utfperf.
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Options do not have abbreviations: Force readable command lines.
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// (Using U+0001 for abbreviation characters.)
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum {
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    CHARSET,
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    CHUNK_LENGTH,
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    PIVOT_LENGTH,
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UTFPERF_OPTIONS_COUNT
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UOption options[UTFPERF_OPTIONS_COUNT]={
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UOPTION_DEF("charset",  '\x01', UOPT_REQUIRES_ARG),
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UOPTION_DEF("chunk",    '\x01', UOPT_REQUIRES_ARG),
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UOPTION_DEF("pivot",    '\x01', UOPT_REQUIRES_ARG)
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char *const utfperf_usage =
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "\t--charset   Charset for which to test performance, e.g. windows-1251.\n"
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "\t            Default: UTF-8\n"
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "\t--chunk     Length (in bytes) of charset output chunks. [4096]\n"
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "\t--pivot     Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "\t            [1024]\n";
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Test object.
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass  UtfPerformanceTest : public UPerfTest{
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            : UPerfTest(argc, argv, options, LENGTHOF(options), utfperf_usage, status) {
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (U_SUCCESS(status)) {
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            charset = options[CHARSET].value;
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            chunkLength = atoi(options[CHUNK_LENGTH].value);
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) {
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY);
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                status = U_ILLEGAL_ARGUMENT_ERROR;
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pivotLength = atoi(options[PIVOT_LENGTH].value);
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) {
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY);
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                status = U_ILLEGAL_ARGUMENT_ERROR;
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int32_t inputLength;
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UPerfTest::getBuffer(inputLength, status);
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            countInputCodePoints = u_countChar32(buffer, bufferLen);
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status);
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *getBuffer() const { return buffer; }
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t getBufferLen() const { return bufferLen; }
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char *charset;
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t chunkLength, pivotLength;
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Custom callback for counting callback calls.
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufromUCallback(const void *context,
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              UConverterFromUnicodeArgs *fromUArgs,
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              const UChar *codeUnits,
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              int32_t length,
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              UChar32 codePoint,
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              UConverterCallbackReason reason,
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              UErrorCode *pErrorCode) {
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (reason <= UCNV_IRREGULAR) {
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ++fromUCallbackCount;
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode);
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass Command : public UPerfFunction {
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprotected:
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Command(const UtfPerformanceTest &testcase)
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            : testcase(testcase),
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              input(testcase.getBuffer()), inputLength(testcase.getBufferLen()),
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              errorCode(U_ZERO_ERROR) {
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv=ucnv_open(testcase.charset, &errorCode);
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (U_FAILURE(errorCode)) {
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode));
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode);
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual ~Command(){
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(U_SUCCESS(errorCode)) {
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_close(cnv);
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // virtual void call(UErrorCode* pErrorCode) { ... }
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual long getOperationsPerIteration(){
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return countInputCodePoints;
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UtfPerformanceTest &testcase;
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *input;
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t inputLength;
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode errorCode;
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *cnv;
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Test roundtrip UTF-16->encoding->UTF-16.
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass Roundtrip : public Command {
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprotected:
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {}
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static UPerfFunction* get(const UtfPerformanceTest &testcase) {
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        Roundtrip * t = new Roundtrip(testcase);
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (U_SUCCESS(t->errorCode)){
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return t;
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            delete t;
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return NULL;
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual void call(UErrorCode* pErrorCode){
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const UChar *pIn, *pInLimit;
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar *pOut, *pOutLimit;
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        char *pInter, *pInterLimit;
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const char *p;
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UBool flush;
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_reset(cnv);
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fromUCallbackCount=0;
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pIn=input;
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pInLimit=input+inputLength;
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pOut=output;
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pOutLimit=output+OUTPUT_CAPACITY;
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pInterLimit=intermediate+testcase.chunkLength;
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        encodedLength=outputLength=0;
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        flush=FALSE;
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        do {
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pInter=intermediate;
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            encodedLength+=(int32_t)(pInter-intermediate);
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* make sure that we convert once more to really flush */
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_ZERO_ERROR;
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(U_FAILURE(*pErrorCode)) {
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return;
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(pIn==pInLimit) {
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                flush=TRUE;
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* convert the block [intermediate..pInter[ back to UTF-16 */
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            p=intermediate;
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode);
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(U_FAILURE(*pErrorCode)) {
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return;
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* intermediate must have been consumed (p==pInter) because of the converter semantics */
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } while(!flush);
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        outputLength=pOut-output;
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(inputLength!=outputLength) {
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength);
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Test one-way conversion UTF-16->encoding.
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass FromUnicode : public Command {
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprotected:
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {}
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static UPerfFunction* get(const UtfPerformanceTest &testcase) {
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        FromUnicode * t = new FromUnicode(testcase);
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (U_SUCCESS(t->errorCode)){
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return t;
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            delete t;
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return NULL;
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual void call(UErrorCode* pErrorCode){
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const UChar *pIn, *pInLimit;
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        char *pInter, *pInterLimit;
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_resetFromUnicode(cnv);
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fromUCallbackCount=0;
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pIn=input;
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pInLimit=input+inputLength;
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pInterLimit=intermediate+testcase.chunkLength;
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        encodedLength=0;
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(;;) {
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pInter=intermediate;
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            encodedLength+=(int32_t)(pInter-intermediate);
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* make sure that we convert once more to really flush */
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_ZERO_ERROR;
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(U_FAILURE(*pErrorCode)) {
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return;
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;  // all done
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Test one-way conversion UTF-8->encoding.
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass FromUTF8 : public Command {
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprotected:
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    FromUTF8(const UtfPerformanceTest &testcase)
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            : Command(testcase),
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              utf8Cnv(NULL),
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              input8(utf8), input8Length(utf8Length) {
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        utf8Cnv=ucnv_open("UTF-8", &errorCode);
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static UPerfFunction* get(const UtfPerformanceTest &testcase) {
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        FromUTF8 * t = new FromUTF8(testcase);
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (U_SUCCESS(t->errorCode)){
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return t;
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            delete t;
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return NULL;
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ~FromUTF8() {
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_close(utf8Cnv);
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual void call(UErrorCode* pErrorCode){
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const char *pIn, *pInLimit;
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        char *pInter, *pInterLimit;
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar *pivotSource, *pivotTarget, *pivotLimit;
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_resetToUnicode(utf8Cnv);
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_resetFromUnicode(cnv);
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fromUCallbackCount=0;
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pIn=input8;
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pInLimit=input8+input8Length;
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pInterLimit=intermediate+testcase.chunkLength;
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pivotSource=pivotTarget=pivot;
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pivotLimit=pivot+testcase.pivotLength;
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        encodedLength=0;
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(;;) {
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pInter=intermediate;
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_convertEx(cnv, utf8Cnv,
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           &pInter, pInterLimit,
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           &pIn, pInLimit,
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           pivot, &pivotSource, &pivotTarget, pivotLimit,
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           FALSE, TRUE, pErrorCode);
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            encodedLength+=(int32_t)(pInter-intermediate);
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* make sure that we convert once more to really flush */
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_ZERO_ERROR;
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(U_FAILURE(*pErrorCode)) {
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return;
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;  // all done
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprotected:
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *utf8Cnv;
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char *input8;
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t input8Length;
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    switch (index) {
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 0: name = "Roundtrip";     if (exec) return Roundtrip::get(*this); break;
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 1: name = "FromUnicode";   if (exec) return FromUnicode::get(*this); break;
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 2: name = "FromUTF8";      if (exec) return FromUTF8::get(*this); break;
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        default: name = ""; break;
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return NULL;
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint main(int argc, const char *argv[])
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Default values for command-line options.
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    options[CHARSET].value = "UTF-8";
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    options[CHUNK_LENGTH].value = "4096";
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    options[PIVOT_LENGTH].value = "1024";
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UtfPerformanceTest test(argc, argv, status);
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru	if (U_FAILURE(status)){
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("The error is %s\n", u_errorName(status));
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        test.usage();
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return status;
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (test.run() == FALSE){
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fprintf(stderr, "FAILED: Tests could not be run please check the "
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru			            "arguments.\n");
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return -1;
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (fromUCallbackCount > 0) {
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount);
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return 0;
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
366