1/*
2 **********************************************************************
3 *   Copyright (C) 2002-2014, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *   file name:  utfperf.cpp
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 2005Nov17
12 *   created by: Raymond Yang
13 *
14 *   Ported from utfper.c created by Markus W. Scherer
15 *   Performance test program for Unicode converters
16 */
17
18#include <stdio.h>
19#include <stdlib.h>
20#include "unicode/uperf.h"
21#include "cmemory.h" // for UPRV_LENGTHOF
22#include "uoptions.h"
23
24/* definitions and text buffers */
25
26#define INPUT_CAPACITY (1024*1024)
27#define INTERMEDIATE_CAPACITY 4096
28#define INTERMEDIATE_SMALL_CAPACITY 20
29#define PIVOT_CAPACITY 1024
30#define OUTPUT_CAPACITY INPUT_CAPACITY
31
32static char utf8[INPUT_CAPACITY];
33static UChar pivot[INTERMEDIATE_CAPACITY];
34
35static UChar output[OUTPUT_CAPACITY];
36static char intermediate[OUTPUT_CAPACITY];
37
38static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints;
39
40static int32_t fromUCallbackCount;
41
42// Command-line options specific to utfperf.
43// Options do not have abbreviations: Force readable command lines.
44// (Using U+0001 for abbreviation characters.)
45enum {
46    CHARSET,
47    CHUNK_LENGTH,
48    PIVOT_LENGTH,
49    UTFPERF_OPTIONS_COUNT
50};
51
52static UOption options[UTFPERF_OPTIONS_COUNT]={
53    UOPTION_DEF("charset",  '\x01', UOPT_REQUIRES_ARG),
54    UOPTION_DEF("chunk",    '\x01', UOPT_REQUIRES_ARG),
55    UOPTION_DEF("pivot",    '\x01', UOPT_REQUIRES_ARG)
56};
57
58static const char *const utfperf_usage =
59    "\t--charset   Charset for which to test performance, e.g. windows-1251.\n"
60    "\t            Default: UTF-8\n"
61    "\t--chunk     Length (in bytes) of charset output chunks. [4096]\n"
62    "\t--pivot     Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
63    "\t            [1024]\n";
64
65// Test object.
66class  UtfPerformanceTest : public UPerfTest{
67public:
68    UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
69            : UPerfTest(argc, argv, options, UPRV_LENGTHOF(options), utfperf_usage, status) {
70        if (U_SUCCESS(status)) {
71            charset = options[CHARSET].value;
72
73            chunkLength = atoi(options[CHUNK_LENGTH].value);
74            if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) {
75                fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY);
76                status = U_ILLEGAL_ARGUMENT_ERROR;
77            }
78
79            pivotLength = atoi(options[PIVOT_LENGTH].value);
80            if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) {
81                fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY);
82                status = U_ILLEGAL_ARGUMENT_ERROR;
83            }
84
85            int32_t inputLength;
86            UPerfTest::getBuffer(inputLength, status);
87            countInputCodePoints = u_countChar32(buffer, bufferLen);
88            u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status);
89        }
90    }
91
92    virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
93
94    const UChar *getBuffer() const { return buffer; }
95    int32_t getBufferLen() const { return bufferLen; }
96
97    const char *charset;
98    int32_t chunkLength, pivotLength;
99};
100
101U_CDECL_BEGIN
102// Custom callback for counting callback calls.
103static void U_CALLCONV
104fromUCallback(const void *context,
105              UConverterFromUnicodeArgs *fromUArgs,
106              const UChar *codeUnits,
107              int32_t length,
108              UChar32 codePoint,
109              UConverterCallbackReason reason,
110              UErrorCode *pErrorCode) {
111    if (reason <= UCNV_IRREGULAR) {
112        ++fromUCallbackCount;
113    }
114    UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode);
115}
116U_CDECL_END
117
118// Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
119class Command : public UPerfFunction {
120protected:
121    Command(const UtfPerformanceTest &testcase)
122            : testcase(testcase),
123              input(testcase.getBuffer()), inputLength(testcase.getBufferLen()),
124              errorCode(U_ZERO_ERROR) {
125        cnv=ucnv_open(testcase.charset, &errorCode);
126        if (U_FAILURE(errorCode)) {
127            fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode));
128        }
129        ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode);
130    }
131public:
132    virtual ~Command(){
133        if(U_SUCCESS(errorCode)) {
134            ucnv_close(cnv);
135        }
136    }
137    // virtual void call(UErrorCode* pErrorCode) { ... }
138    virtual long getOperationsPerIteration(){
139        return countInputCodePoints;
140    }
141
142    const UtfPerformanceTest &testcase;
143    const UChar *input;
144    int32_t inputLength;
145    UErrorCode errorCode;
146    UConverter *cnv;
147};
148
149// Test roundtrip UTF-16->encoding->UTF-16.
150class Roundtrip : public Command {
151protected:
152    Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {}
153public:
154    static UPerfFunction* get(const UtfPerformanceTest &testcase) {
155        Roundtrip * t = new Roundtrip(testcase);
156        if (U_SUCCESS(t->errorCode)){
157            return t;
158        } else {
159            delete t;
160            return NULL;
161        }
162    }
163    virtual void call(UErrorCode* pErrorCode){
164        const UChar *pIn, *pInLimit;
165        UChar *pOut, *pOutLimit;
166        char *pInter, *pInterLimit;
167        const char *p;
168        UBool flush;
169
170        ucnv_reset(cnv);
171        fromUCallbackCount=0;
172
173        pIn=input;
174        pInLimit=input+inputLength;
175
176        pOut=output;
177        pOutLimit=output+OUTPUT_CAPACITY;
178
179        pInterLimit=intermediate+testcase.chunkLength;
180
181        encodedLength=outputLength=0;
182        flush=FALSE;
183
184        do {
185            /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
186            pInter=intermediate;
187            ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
188            encodedLength+=(int32_t)(pInter-intermediate);
189
190            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
191                /* make sure that we convert once more to really flush */
192                *pErrorCode=U_ZERO_ERROR;
193            } else if(U_FAILURE(*pErrorCode)) {
194                return;
195            } else if(pIn==pInLimit) {
196                flush=TRUE;
197            }
198
199            /* convert the block [intermediate..pInter[ back to UTF-16 */
200            p=intermediate;
201            ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode);
202            if(U_FAILURE(*pErrorCode)) {
203                return;
204            }
205            /* intermediate must have been consumed (p==pInter) because of the converter semantics */
206        } while(!flush);
207
208        outputLength=pOut-output;
209        if(inputLength!=outputLength) {
210            fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength);
211            *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
212        }
213    }
214};
215
216// Test one-way conversion UTF-16->encoding.
217class FromUnicode : public Command {
218protected:
219    FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {}
220public:
221    static UPerfFunction* get(const UtfPerformanceTest &testcase) {
222        FromUnicode * t = new FromUnicode(testcase);
223        if (U_SUCCESS(t->errorCode)){
224            return t;
225        } else {
226            delete t;
227            return NULL;
228        }
229    }
230    virtual void call(UErrorCode* pErrorCode){
231        const UChar *pIn, *pInLimit;
232        char *pInter, *pInterLimit;
233
234        ucnv_resetFromUnicode(cnv);
235        fromUCallbackCount=0;
236
237        pIn=input;
238        pInLimit=input+inputLength;
239
240        pInterLimit=intermediate+testcase.chunkLength;
241
242        encodedLength=0;
243
244        for(;;) {
245            pInter=intermediate;
246            ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
247            encodedLength+=(int32_t)(pInter-intermediate);
248
249            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
250                /* make sure that we convert once more to really flush */
251                *pErrorCode=U_ZERO_ERROR;
252            } else if(U_FAILURE(*pErrorCode)) {
253                return;
254            } else {
255                break;  // all done
256            }
257        }
258    }
259};
260
261// Test one-way conversion UTF-8->encoding.
262class FromUTF8 : public Command {
263protected:
264    FromUTF8(const UtfPerformanceTest &testcase)
265            : Command(testcase),
266              utf8Cnv(NULL),
267              input8(utf8), input8Length(utf8Length) {
268        utf8Cnv=ucnv_open("UTF-8", &errorCode);
269    }
270public:
271    static UPerfFunction* get(const UtfPerformanceTest &testcase) {
272        FromUTF8 * t = new FromUTF8(testcase);
273        if (U_SUCCESS(t->errorCode)){
274            return t;
275        } else {
276            delete t;
277            return NULL;
278        }
279    }
280    ~FromUTF8() {
281        ucnv_close(utf8Cnv);
282    }
283    virtual void call(UErrorCode* pErrorCode){
284        const char *pIn, *pInLimit;
285        char *pInter, *pInterLimit;
286        UChar *pivotSource, *pivotTarget, *pivotLimit;
287
288        ucnv_resetToUnicode(utf8Cnv);
289        ucnv_resetFromUnicode(cnv);
290        fromUCallbackCount=0;
291
292        pIn=input8;
293        pInLimit=input8+input8Length;
294
295        pInterLimit=intermediate+testcase.chunkLength;
296
297        pivotSource=pivotTarget=pivot;
298        pivotLimit=pivot+testcase.pivotLength;
299
300        encodedLength=0;
301
302        for(;;) {
303            pInter=intermediate;
304            ucnv_convertEx(cnv, utf8Cnv,
305                           &pInter, pInterLimit,
306                           &pIn, pInLimit,
307                           pivot, &pivotSource, &pivotTarget, pivotLimit,
308                           FALSE, TRUE, pErrorCode);
309            encodedLength+=(int32_t)(pInter-intermediate);
310
311            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
312                /* make sure that we convert once more to really flush */
313                *pErrorCode=U_ZERO_ERROR;
314            } else if(U_FAILURE(*pErrorCode)) {
315                return;
316            } else {
317                break;  // all done
318            }
319        }
320    }
321protected:
322    UConverter *utf8Cnv;
323    const char *input8;
324    int32_t input8Length;
325};
326
327UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
328    switch (index) {
329        case 0: name = "Roundtrip";     if (exec) return Roundtrip::get(*this); break;
330        case 1: name = "FromUnicode";   if (exec) return FromUnicode::get(*this); break;
331        case 2: name = "FromUTF8";      if (exec) return FromUTF8::get(*this); break;
332        default: name = ""; break;
333    }
334    return NULL;
335}
336
337int main(int argc, const char *argv[])
338{
339    // Default values for command-line options.
340    options[CHARSET].value = "UTF-8";
341    options[CHUNK_LENGTH].value = "4096";
342    options[PIVOT_LENGTH].value = "1024";
343
344    UErrorCode status = U_ZERO_ERROR;
345    UtfPerformanceTest test(argc, argv, status);
346
347	if (U_FAILURE(status)){
348        printf("The error is %s\n", u_errorName(status));
349        test.usage();
350        return status;
351    }
352
353    if (test.run() == FALSE){
354        fprintf(stderr, "FAILED: Tests could not be run please check the "
355			            "arguments.\n");
356        return -1;
357    }
358
359    if (fromUCallbackCount > 0) {
360        printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount);
361    }
362
363    return 0;
364}
365