1/*
2**************************************************************************
3 *   Copyright (C) 2016 and later: Unicode, Inc. and others.
4 *   License & terms of use: http://www.unicode.org/copyright.html#License
5 *************************************************************************
6 *************************************************************************
7 *   Copyright (C) 2002-2014, International Business Machines
8 *   Corporation and others.  All Rights Reserved.
9 *************************************************************************
10 *   file name:  utfperf.cpp
11 *   encoding:   US-ASCII
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2005Nov17
16 *   created by: Raymond Yang
17 *
18 *   Ported from utfper.c created by Markus W. Scherer
19 *   Performance test program for Unicode converters
20 */
21
22#include <stdio.h>
23#include <stdlib.h>
24#include "unicode/uperf.h"
25#include "cmemory.h" // for UPRV_LENGTHOF
26#include "uoptions.h"
27
28/* definitions and text buffers */
29
30#define INPUT_CAPACITY (1024*1024)
31#define INTERMEDIATE_CAPACITY 4096
32#define INTERMEDIATE_SMALL_CAPACITY 20
33#define PIVOT_CAPACITY 1024
34#define OUTPUT_CAPACITY INPUT_CAPACITY
35
36static char utf8[INPUT_CAPACITY];
37static UChar pivot[INTERMEDIATE_CAPACITY];
38
39static UChar output[OUTPUT_CAPACITY];
40static char intermediate[OUTPUT_CAPACITY];
41
42static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints;
43
44static int32_t fromUCallbackCount;
45
46// Command-line options specific to utfperf.
47// Options do not have abbreviations: Force readable command lines.
48// (Using U+0001 for abbreviation characters.)
49enum {
50    CHARSET,
51    CHUNK_LENGTH,
52    PIVOT_LENGTH,
53    UTFPERF_OPTIONS_COUNT
54};
55
56static UOption options[UTFPERF_OPTIONS_COUNT]={
57    UOPTION_DEF("charset",  '\x01', UOPT_REQUIRES_ARG),
58    UOPTION_DEF("chunk",    '\x01', UOPT_REQUIRES_ARG),
59    UOPTION_DEF("pivot",    '\x01', UOPT_REQUIRES_ARG)
60};
61
62static const char *const utfperf_usage =
63    "\t--charset   Charset for which to test performance, e.g. windows-1251.\n"
64    "\t            Default: UTF-8\n"
65    "\t--chunk     Length (in bytes) of charset output chunks. [4096]\n"
66    "\t--pivot     Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
67    "\t            [1024]\n";
68
69// Test object.
70class  UtfPerformanceTest : public UPerfTest{
71public:
72    UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
73            : UPerfTest(argc, argv, options, UPRV_LENGTHOF(options), utfperf_usage, status) {
74        if (U_SUCCESS(status)) {
75            charset = options[CHARSET].value;
76
77            chunkLength = atoi(options[CHUNK_LENGTH].value);
78            if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) {
79                fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY);
80                status = U_ILLEGAL_ARGUMENT_ERROR;
81            }
82
83            pivotLength = atoi(options[PIVOT_LENGTH].value);
84            if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) {
85                fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY);
86                status = U_ILLEGAL_ARGUMENT_ERROR;
87            }
88
89            int32_t inputLength;
90            UPerfTest::getBuffer(inputLength, status);
91            countInputCodePoints = u_countChar32(buffer, bufferLen);
92            u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status);
93        }
94    }
95
96    virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
97
98    const UChar *getBuffer() const { return buffer; }
99    int32_t getBufferLen() const { return bufferLen; }
100
101    const char *charset;
102    int32_t chunkLength, pivotLength;
103};
104
105U_CDECL_BEGIN
106// Custom callback for counting callback calls.
107static void U_CALLCONV
108fromUCallback(const void *context,
109              UConverterFromUnicodeArgs *fromUArgs,
110              const UChar *codeUnits,
111              int32_t length,
112              UChar32 codePoint,
113              UConverterCallbackReason reason,
114              UErrorCode *pErrorCode) {
115    if (reason <= UCNV_IRREGULAR) {
116        ++fromUCallbackCount;
117    }
118    UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode);
119}
120U_CDECL_END
121
122// Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
123class Command : public UPerfFunction {
124protected:
125    Command(const UtfPerformanceTest &testcase)
126            : testcase(testcase),
127              input(testcase.getBuffer()), inputLength(testcase.getBufferLen()),
128              errorCode(U_ZERO_ERROR) {
129        cnv=ucnv_open(testcase.charset, &errorCode);
130        if (U_FAILURE(errorCode)) {
131            fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode));
132        }
133        ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode);
134    }
135public:
136    virtual ~Command(){
137        if(U_SUCCESS(errorCode)) {
138            ucnv_close(cnv);
139        }
140    }
141    // virtual void call(UErrorCode* pErrorCode) { ... }
142    virtual long getOperationsPerIteration(){
143        return countInputCodePoints;
144    }
145
146    const UtfPerformanceTest &testcase;
147    const UChar *input;
148    int32_t inputLength;
149    UErrorCode errorCode;
150    UConverter *cnv;
151};
152
153// Test roundtrip UTF-16->encoding->UTF-16.
154class Roundtrip : public Command {
155protected:
156    Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {}
157public:
158    static UPerfFunction* get(const UtfPerformanceTest &testcase) {
159        Roundtrip * t = new Roundtrip(testcase);
160        if (U_SUCCESS(t->errorCode)){
161            return t;
162        } else {
163            delete t;
164            return NULL;
165        }
166    }
167    virtual void call(UErrorCode* pErrorCode){
168        const UChar *pIn, *pInLimit;
169        UChar *pOut, *pOutLimit;
170        char *pInter, *pInterLimit;
171        const char *p;
172        UBool flush;
173
174        ucnv_reset(cnv);
175        fromUCallbackCount=0;
176
177        pIn=input;
178        pInLimit=input+inputLength;
179
180        pOut=output;
181        pOutLimit=output+OUTPUT_CAPACITY;
182
183        pInterLimit=intermediate+testcase.chunkLength;
184
185        encodedLength=outputLength=0;
186        flush=FALSE;
187
188        do {
189            /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
190            pInter=intermediate;
191            ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
192            encodedLength+=(int32_t)(pInter-intermediate);
193
194            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
195                /* make sure that we convert once more to really flush */
196                *pErrorCode=U_ZERO_ERROR;
197            } else if(U_FAILURE(*pErrorCode)) {
198                return;
199            } else if(pIn==pInLimit) {
200                flush=TRUE;
201            }
202
203            /* convert the block [intermediate..pInter[ back to UTF-16 */
204            p=intermediate;
205            ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode);
206            if(U_FAILURE(*pErrorCode)) {
207                return;
208            }
209            /* intermediate must have been consumed (p==pInter) because of the converter semantics */
210        } while(!flush);
211
212        outputLength=pOut-output;
213        if(inputLength!=outputLength) {
214            fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength);
215            *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
216        }
217    }
218};
219
220// Test one-way conversion UTF-16->encoding.
221class FromUnicode : public Command {
222protected:
223    FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {}
224public:
225    static UPerfFunction* get(const UtfPerformanceTest &testcase) {
226        FromUnicode * t = new FromUnicode(testcase);
227        if (U_SUCCESS(t->errorCode)){
228            return t;
229        } else {
230            delete t;
231            return NULL;
232        }
233    }
234    virtual void call(UErrorCode* pErrorCode){
235        const UChar *pIn, *pInLimit;
236        char *pInter, *pInterLimit;
237
238        ucnv_resetFromUnicode(cnv);
239        fromUCallbackCount=0;
240
241        pIn=input;
242        pInLimit=input+inputLength;
243
244        pInterLimit=intermediate+testcase.chunkLength;
245
246        encodedLength=0;
247
248        for(;;) {
249            pInter=intermediate;
250            ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
251            encodedLength+=(int32_t)(pInter-intermediate);
252
253            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
254                /* make sure that we convert once more to really flush */
255                *pErrorCode=U_ZERO_ERROR;
256            } else if(U_FAILURE(*pErrorCode)) {
257                return;
258            } else {
259                break;  // all done
260            }
261        }
262    }
263};
264
265// Test one-way conversion UTF-8->encoding.
266class FromUTF8 : public Command {
267protected:
268    FromUTF8(const UtfPerformanceTest &testcase)
269            : Command(testcase),
270              utf8Cnv(NULL),
271              input8(utf8), input8Length(utf8Length) {
272        utf8Cnv=ucnv_open("UTF-8", &errorCode);
273    }
274public:
275    static UPerfFunction* get(const UtfPerformanceTest &testcase) {
276        FromUTF8 * t = new FromUTF8(testcase);
277        if (U_SUCCESS(t->errorCode)){
278            return t;
279        } else {
280            delete t;
281            return NULL;
282        }
283    }
284    ~FromUTF8() {
285        ucnv_close(utf8Cnv);
286    }
287    virtual void call(UErrorCode* pErrorCode){
288        const char *pIn, *pInLimit;
289        char *pInter, *pInterLimit;
290        UChar *pivotSource, *pivotTarget, *pivotLimit;
291
292        ucnv_resetToUnicode(utf8Cnv);
293        ucnv_resetFromUnicode(cnv);
294        fromUCallbackCount=0;
295
296        pIn=input8;
297        pInLimit=input8+input8Length;
298
299        pInterLimit=intermediate+testcase.chunkLength;
300
301        pivotSource=pivotTarget=pivot;
302        pivotLimit=pivot+testcase.pivotLength;
303
304        encodedLength=0;
305
306        for(;;) {
307            pInter=intermediate;
308            ucnv_convertEx(cnv, utf8Cnv,
309                           &pInter, pInterLimit,
310                           &pIn, pInLimit,
311                           pivot, &pivotSource, &pivotTarget, pivotLimit,
312                           FALSE, TRUE, pErrorCode);
313            encodedLength+=(int32_t)(pInter-intermediate);
314
315            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
316                /* make sure that we convert once more to really flush */
317                *pErrorCode=U_ZERO_ERROR;
318            } else if(U_FAILURE(*pErrorCode)) {
319                return;
320            } else {
321                break;  // all done
322            }
323        }
324    }
325protected:
326    UConverter *utf8Cnv;
327    const char *input8;
328    int32_t input8Length;
329};
330
331UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
332    switch (index) {
333        case 0: name = "Roundtrip";     if (exec) return Roundtrip::get(*this); break;
334        case 1: name = "FromUnicode";   if (exec) return FromUnicode::get(*this); break;
335        case 2: name = "FromUTF8";      if (exec) return FromUTF8::get(*this); break;
336        default: name = ""; break;
337    }
338    return NULL;
339}
340
341int main(int argc, const char *argv[])
342{
343    // Default values for command-line options.
344    options[CHARSET].value = "UTF-8";
345    options[CHUNK_LENGTH].value = "4096";
346    options[PIVOT_LENGTH].value = "1024";
347
348    UErrorCode status = U_ZERO_ERROR;
349    UtfPerformanceTest test(argc, argv, status);
350
351	if (U_FAILURE(status)){
352        printf("The error is %s\n", u_errorName(status));
353        test.usage();
354        return status;
355    }
356
357    if (test.run() == FALSE){
358        fprintf(stderr, "FAILED: Tests could not be run please check the "
359			            "arguments.\n");
360        return -1;
361    }
362
363    if (fromUCallbackCount > 0) {
364        printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount);
365    }
366
367    return 0;
368}
369