1/*
2 **********************************************************************
3 *   Copyright (C) 2002-2007, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *   file name:  utfperf.cpp
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 2005Nov17
12 *   created by: Raymond Yang
13 *
14 *   Ported from utfper.c created by Markus W. Scherer
15 *   Performance test program for Unicode converters
16 */
17
18#include <stdio.h>
19#include <stdlib.h>
20#include "unicode/uperf.h"
21#include "uoptions.h"
22
23#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
24
25/* definitions and text buffers */
26
27#define INPUT_CAPACITY (1024*1024)
28#define INTERMEDIATE_CAPACITY 4096
29#define INTERMEDIATE_SMALL_CAPACITY 20
30#define PIVOT_CAPACITY 1024
31#define OUTPUT_CAPACITY INPUT_CAPACITY
32
33static char utf8[INPUT_CAPACITY];
34static UChar pivot[INTERMEDIATE_CAPACITY];
35
36static UChar output[OUTPUT_CAPACITY];
37static char intermediate[OUTPUT_CAPACITY];
38
39static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints;
40
41static int32_t fromUCallbackCount;
42
43// Command-line options specific to utfperf.
44// Options do not have abbreviations: Force readable command lines.
45// (Using U+0001 for abbreviation characters.)
46enum {
47    CHARSET,
48    CHUNK_LENGTH,
49    PIVOT_LENGTH,
50    UTFPERF_OPTIONS_COUNT
51};
52
53static UOption options[UTFPERF_OPTIONS_COUNT]={
54    UOPTION_DEF("charset",  '\x01', UOPT_REQUIRES_ARG),
55    UOPTION_DEF("chunk",    '\x01', UOPT_REQUIRES_ARG),
56    UOPTION_DEF("pivot",    '\x01', UOPT_REQUIRES_ARG)
57};
58
59static const char *const utfperf_usage =
60    "\t--charset   Charset for which to test performance, e.g. windows-1251.\n"
61    "\t            Default: UTF-8\n"
62    "\t--chunk     Length (in bytes) of charset output chunks. [4096]\n"
63    "\t--pivot     Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
64    "\t            [1024]\n";
65
66// Test object.
67class  UtfPerformanceTest : public UPerfTest{
68public:
69    UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
70            : UPerfTest(argc, argv, options, LENGTHOF(options), utfperf_usage, status) {
71        if (U_SUCCESS(status)) {
72            charset = options[CHARSET].value;
73
74            chunkLength = atoi(options[CHUNK_LENGTH].value);
75            if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) {
76                fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY);
77                status = U_ILLEGAL_ARGUMENT_ERROR;
78            }
79
80            pivotLength = atoi(options[PIVOT_LENGTH].value);
81            if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) {
82                fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY);
83                status = U_ILLEGAL_ARGUMENT_ERROR;
84            }
85
86            int32_t inputLength;
87            UPerfTest::getBuffer(inputLength, status);
88            countInputCodePoints = u_countChar32(buffer, bufferLen);
89            u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status);
90        }
91    }
92
93    virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
94
95    const UChar *getBuffer() const { return buffer; }
96    int32_t getBufferLen() const { return bufferLen; }
97
98    const char *charset;
99    int32_t chunkLength, pivotLength;
100};
101
102U_CDECL_BEGIN
103// Custom callback for counting callback calls.
104static void U_CALLCONV
105fromUCallback(const void *context,
106              UConverterFromUnicodeArgs *fromUArgs,
107              const UChar *codeUnits,
108              int32_t length,
109              UChar32 codePoint,
110              UConverterCallbackReason reason,
111              UErrorCode *pErrorCode) {
112    if (reason <= UCNV_IRREGULAR) {
113        ++fromUCallbackCount;
114    }
115    UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode);
116}
117U_CDECL_END
118
119// Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
120class Command : public UPerfFunction {
121protected:
122    Command(const UtfPerformanceTest &testcase)
123            : testcase(testcase),
124              input(testcase.getBuffer()), inputLength(testcase.getBufferLen()),
125              errorCode(U_ZERO_ERROR) {
126        cnv=ucnv_open(testcase.charset, &errorCode);
127        if (U_FAILURE(errorCode)) {
128            fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode));
129        }
130        ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode);
131    }
132public:
133    virtual ~Command(){
134        if(U_SUCCESS(errorCode)) {
135            ucnv_close(cnv);
136        }
137    }
138    // virtual void call(UErrorCode* pErrorCode) { ... }
139    virtual long getOperationsPerIteration(){
140        return countInputCodePoints;
141    }
142
143    const UtfPerformanceTest &testcase;
144    const UChar *input;
145    int32_t inputLength;
146    UErrorCode errorCode;
147    UConverter *cnv;
148};
149
150// Test roundtrip UTF-16->encoding->UTF-16.
151class Roundtrip : public Command {
152protected:
153    Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {}
154public:
155    static UPerfFunction* get(const UtfPerformanceTest &testcase) {
156        Roundtrip * t = new Roundtrip(testcase);
157        if (U_SUCCESS(t->errorCode)){
158            return t;
159        } else {
160            delete t;
161            return NULL;
162        }
163    }
164    virtual void call(UErrorCode* pErrorCode){
165        const UChar *pIn, *pInLimit;
166        UChar *pOut, *pOutLimit;
167        char *pInter, *pInterLimit;
168        const char *p;
169        UBool flush;
170
171        ucnv_reset(cnv);
172        fromUCallbackCount=0;
173
174        pIn=input;
175        pInLimit=input+inputLength;
176
177        pOut=output;
178        pOutLimit=output+OUTPUT_CAPACITY;
179
180        pInterLimit=intermediate+testcase.chunkLength;
181
182        encodedLength=outputLength=0;
183        flush=FALSE;
184
185        do {
186            /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
187            pInter=intermediate;
188            ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
189            encodedLength+=(int32_t)(pInter-intermediate);
190
191            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
192                /* make sure that we convert once more to really flush */
193                *pErrorCode=U_ZERO_ERROR;
194            } else if(U_FAILURE(*pErrorCode)) {
195                return;
196            } else if(pIn==pInLimit) {
197                flush=TRUE;
198            }
199
200            /* convert the block [intermediate..pInter[ back to UTF-16 */
201            p=intermediate;
202            ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode);
203            if(U_FAILURE(*pErrorCode)) {
204                return;
205            }
206            /* intermediate must have been consumed (p==pInter) because of the converter semantics */
207        } while(!flush);
208
209        outputLength=pOut-output;
210        if(inputLength!=outputLength) {
211            fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength);
212            *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
213        }
214    }
215};
216
217// Test one-way conversion UTF-16->encoding.
218class FromUnicode : public Command {
219protected:
220    FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {}
221public:
222    static UPerfFunction* get(const UtfPerformanceTest &testcase) {
223        FromUnicode * t = new FromUnicode(testcase);
224        if (U_SUCCESS(t->errorCode)){
225            return t;
226        } else {
227            delete t;
228            return NULL;
229        }
230    }
231    virtual void call(UErrorCode* pErrorCode){
232        const UChar *pIn, *pInLimit;
233        char *pInter, *pInterLimit;
234
235        ucnv_resetFromUnicode(cnv);
236        fromUCallbackCount=0;
237
238        pIn=input;
239        pInLimit=input+inputLength;
240
241        pInterLimit=intermediate+testcase.chunkLength;
242
243        encodedLength=0;
244
245        for(;;) {
246            pInter=intermediate;
247            ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
248            encodedLength+=(int32_t)(pInter-intermediate);
249
250            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
251                /* make sure that we convert once more to really flush */
252                *pErrorCode=U_ZERO_ERROR;
253            } else if(U_FAILURE(*pErrorCode)) {
254                return;
255            } else {
256                break;  // all done
257            }
258        }
259    }
260};
261
262// Test one-way conversion UTF-8->encoding.
263class FromUTF8 : public Command {
264protected:
265    FromUTF8(const UtfPerformanceTest &testcase)
266            : Command(testcase),
267              utf8Cnv(NULL),
268              input8(utf8), input8Length(utf8Length) {
269        utf8Cnv=ucnv_open("UTF-8", &errorCode);
270    }
271public:
272    static UPerfFunction* get(const UtfPerformanceTest &testcase) {
273        FromUTF8 * t = new FromUTF8(testcase);
274        if (U_SUCCESS(t->errorCode)){
275            return t;
276        } else {
277            delete t;
278            return NULL;
279        }
280    }
281    ~FromUTF8() {
282        ucnv_close(utf8Cnv);
283    }
284    virtual void call(UErrorCode* pErrorCode){
285        const char *pIn, *pInLimit;
286        char *pInter, *pInterLimit;
287        UChar *pivotSource, *pivotTarget, *pivotLimit;
288
289        ucnv_resetToUnicode(utf8Cnv);
290        ucnv_resetFromUnicode(cnv);
291        fromUCallbackCount=0;
292
293        pIn=input8;
294        pInLimit=input8+input8Length;
295
296        pInterLimit=intermediate+testcase.chunkLength;
297
298        pivotSource=pivotTarget=pivot;
299        pivotLimit=pivot+testcase.pivotLength;
300
301        encodedLength=0;
302
303        for(;;) {
304            pInter=intermediate;
305            ucnv_convertEx(cnv, utf8Cnv,
306                           &pInter, pInterLimit,
307                           &pIn, pInLimit,
308                           pivot, &pivotSource, &pivotTarget, pivotLimit,
309                           FALSE, TRUE, pErrorCode);
310            encodedLength+=(int32_t)(pInter-intermediate);
311
312            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
313                /* make sure that we convert once more to really flush */
314                *pErrorCode=U_ZERO_ERROR;
315            } else if(U_FAILURE(*pErrorCode)) {
316                return;
317            } else {
318                break;  // all done
319            }
320        }
321    }
322protected:
323    UConverter *utf8Cnv;
324    const char *input8;
325    int32_t input8Length;
326};
327
328UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
329    switch (index) {
330        case 0: name = "Roundtrip";     if (exec) return Roundtrip::get(*this); break;
331        case 1: name = "FromUnicode";   if (exec) return FromUnicode::get(*this); break;
332        case 2: name = "FromUTF8";      if (exec) return FromUTF8::get(*this); break;
333        default: name = ""; break;
334    }
335    return NULL;
336}
337
338int main(int argc, const char *argv[])
339{
340    // Default values for command-line options.
341    options[CHARSET].value = "UTF-8";
342    options[CHUNK_LENGTH].value = "4096";
343    options[PIVOT_LENGTH].value = "1024";
344
345    UErrorCode status = U_ZERO_ERROR;
346    UtfPerformanceTest test(argc, argv, status);
347
348	if (U_FAILURE(status)){
349        printf("The error is %s\n", u_errorName(status));
350        test.usage();
351        return status;
352    }
353
354    if (test.run() == FALSE){
355        fprintf(stderr, "FAILED: Tests could not be run please check the "
356			            "arguments.\n");
357        return -1;
358    }
359
360    if (fromUCallbackCount > 0) {
361        printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount);
362    }
363
364    return 0;
365}
366