1/* 2 ********************************************************************** 3 * Copyright (C) 2002-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * file name: utfperf.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2005Nov17 12 * created by: Raymond Yang 13 * 14 * Ported from utfper.c created by Markus W. Scherer 15 * Performance test program for Unicode converters 16 */ 17 18#include <stdio.h> 19#include <stdlib.h> 20#include "unicode/uperf.h" 21#include "cmemory.h" // for UPRV_LENGTHOF 22#include "uoptions.h" 23 24/* definitions and text buffers */ 25 26#define INPUT_CAPACITY (1024*1024) 27#define INTERMEDIATE_CAPACITY 4096 28#define INTERMEDIATE_SMALL_CAPACITY 20 29#define PIVOT_CAPACITY 1024 30#define OUTPUT_CAPACITY INPUT_CAPACITY 31 32static char utf8[INPUT_CAPACITY]; 33static UChar pivot[INTERMEDIATE_CAPACITY]; 34 35static UChar output[OUTPUT_CAPACITY]; 36static char intermediate[OUTPUT_CAPACITY]; 37 38static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints; 39 40static int32_t fromUCallbackCount; 41 42// Command-line options specific to utfperf. 43// Options do not have abbreviations: Force readable command lines. 44// (Using U+0001 for abbreviation characters.) 45enum { 46 CHARSET, 47 CHUNK_LENGTH, 48 PIVOT_LENGTH, 49 UTFPERF_OPTIONS_COUNT 50}; 51 52static UOption options[UTFPERF_OPTIONS_COUNT]={ 53 UOPTION_DEF("charset", '\x01', UOPT_REQUIRES_ARG), 54 UOPTION_DEF("chunk", '\x01', UOPT_REQUIRES_ARG), 55 UOPTION_DEF("pivot", '\x01', UOPT_REQUIRES_ARG) 56}; 57 58static const char *const utfperf_usage = 59 "\t--charset Charset for which to test performance, e.g. windows-1251.\n" 60 "\t Default: UTF-8\n" 61 "\t--chunk Length (in bytes) of charset output chunks. [4096]\n" 62 "\t--pivot Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n" 63 "\t [1024]\n"; 64 65// Test object. 66class UtfPerformanceTest : public UPerfTest{ 67public: 68 UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status) 69 : UPerfTest(argc, argv, options, UPRV_LENGTHOF(options), utfperf_usage, status) { 70 if (U_SUCCESS(status)) { 71 charset = options[CHARSET].value; 72 73 chunkLength = atoi(options[CHUNK_LENGTH].value); 74 if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) { 75 fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY); 76 status = U_ILLEGAL_ARGUMENT_ERROR; 77 } 78 79 pivotLength = atoi(options[PIVOT_LENGTH].value); 80 if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) { 81 fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY); 82 status = U_ILLEGAL_ARGUMENT_ERROR; 83 } 84 85 int32_t inputLength; 86 UPerfTest::getBuffer(inputLength, status); 87 countInputCodePoints = u_countChar32(buffer, bufferLen); 88 u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status); 89 } 90 } 91 92 virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL); 93 94 const UChar *getBuffer() const { return buffer; } 95 int32_t getBufferLen() const { return bufferLen; } 96 97 const char *charset; 98 int32_t chunkLength, pivotLength; 99}; 100 101U_CDECL_BEGIN 102// Custom callback for counting callback calls. 103static void U_CALLCONV 104fromUCallback(const void *context, 105 UConverterFromUnicodeArgs *fromUArgs, 106 const UChar *codeUnits, 107 int32_t length, 108 UChar32 codePoint, 109 UConverterCallbackReason reason, 110 UErrorCode *pErrorCode) { 111 if (reason <= UCNV_IRREGULAR) { 112 ++fromUCallbackCount; 113 } 114 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode); 115} 116U_CDECL_END 117 118// Base class for Roundtrip, FromUnicode and FromUTF8 with common setup. 119class Command : public UPerfFunction { 120protected: 121 Command(const UtfPerformanceTest &testcase) 122 : testcase(testcase), 123 input(testcase.getBuffer()), inputLength(testcase.getBufferLen()), 124 errorCode(U_ZERO_ERROR) { 125 cnv=ucnv_open(testcase.charset, &errorCode); 126 if (U_FAILURE(errorCode)) { 127 fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode)); 128 } 129 ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode); 130 } 131public: 132 virtual ~Command(){ 133 if(U_SUCCESS(errorCode)) { 134 ucnv_close(cnv); 135 } 136 } 137 // virtual void call(UErrorCode* pErrorCode) { ... } 138 virtual long getOperationsPerIteration(){ 139 return countInputCodePoints; 140 } 141 142 const UtfPerformanceTest &testcase; 143 const UChar *input; 144 int32_t inputLength; 145 UErrorCode errorCode; 146 UConverter *cnv; 147}; 148 149// Test roundtrip UTF-16->encoding->UTF-16. 150class Roundtrip : public Command { 151protected: 152 Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {} 153public: 154 static UPerfFunction* get(const UtfPerformanceTest &testcase) { 155 Roundtrip * t = new Roundtrip(testcase); 156 if (U_SUCCESS(t->errorCode)){ 157 return t; 158 } else { 159 delete t; 160 return NULL; 161 } 162 } 163 virtual void call(UErrorCode* pErrorCode){ 164 const UChar *pIn, *pInLimit; 165 UChar *pOut, *pOutLimit; 166 char *pInter, *pInterLimit; 167 const char *p; 168 UBool flush; 169 170 ucnv_reset(cnv); 171 fromUCallbackCount=0; 172 173 pIn=input; 174 pInLimit=input+inputLength; 175 176 pOut=output; 177 pOutLimit=output+OUTPUT_CAPACITY; 178 179 pInterLimit=intermediate+testcase.chunkLength; 180 181 encodedLength=outputLength=0; 182 flush=FALSE; 183 184 do { 185 /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */ 186 pInter=intermediate; 187 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode); 188 encodedLength+=(int32_t)(pInter-intermediate); 189 190 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 191 /* make sure that we convert once more to really flush */ 192 *pErrorCode=U_ZERO_ERROR; 193 } else if(U_FAILURE(*pErrorCode)) { 194 return; 195 } else if(pIn==pInLimit) { 196 flush=TRUE; 197 } 198 199 /* convert the block [intermediate..pInter[ back to UTF-16 */ 200 p=intermediate; 201 ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode); 202 if(U_FAILURE(*pErrorCode)) { 203 return; 204 } 205 /* intermediate must have been consumed (p==pInter) because of the converter semantics */ 206 } while(!flush); 207 208 outputLength=pOut-output; 209 if(inputLength!=outputLength) { 210 fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength); 211 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; 212 } 213 } 214}; 215 216// Test one-way conversion UTF-16->encoding. 217class FromUnicode : public Command { 218protected: 219 FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {} 220public: 221 static UPerfFunction* get(const UtfPerformanceTest &testcase) { 222 FromUnicode * t = new FromUnicode(testcase); 223 if (U_SUCCESS(t->errorCode)){ 224 return t; 225 } else { 226 delete t; 227 return NULL; 228 } 229 } 230 virtual void call(UErrorCode* pErrorCode){ 231 const UChar *pIn, *pInLimit; 232 char *pInter, *pInterLimit; 233 234 ucnv_resetFromUnicode(cnv); 235 fromUCallbackCount=0; 236 237 pIn=input; 238 pInLimit=input+inputLength; 239 240 pInterLimit=intermediate+testcase.chunkLength; 241 242 encodedLength=0; 243 244 for(;;) { 245 pInter=intermediate; 246 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode); 247 encodedLength+=(int32_t)(pInter-intermediate); 248 249 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 250 /* make sure that we convert once more to really flush */ 251 *pErrorCode=U_ZERO_ERROR; 252 } else if(U_FAILURE(*pErrorCode)) { 253 return; 254 } else { 255 break; // all done 256 } 257 } 258 } 259}; 260 261// Test one-way conversion UTF-8->encoding. 262class FromUTF8 : public Command { 263protected: 264 FromUTF8(const UtfPerformanceTest &testcase) 265 : Command(testcase), 266 utf8Cnv(NULL), 267 input8(utf8), input8Length(utf8Length) { 268 utf8Cnv=ucnv_open("UTF-8", &errorCode); 269 } 270public: 271 static UPerfFunction* get(const UtfPerformanceTest &testcase) { 272 FromUTF8 * t = new FromUTF8(testcase); 273 if (U_SUCCESS(t->errorCode)){ 274 return t; 275 } else { 276 delete t; 277 return NULL; 278 } 279 } 280 ~FromUTF8() { 281 ucnv_close(utf8Cnv); 282 } 283 virtual void call(UErrorCode* pErrorCode){ 284 const char *pIn, *pInLimit; 285 char *pInter, *pInterLimit; 286 UChar *pivotSource, *pivotTarget, *pivotLimit; 287 288 ucnv_resetToUnicode(utf8Cnv); 289 ucnv_resetFromUnicode(cnv); 290 fromUCallbackCount=0; 291 292 pIn=input8; 293 pInLimit=input8+input8Length; 294 295 pInterLimit=intermediate+testcase.chunkLength; 296 297 pivotSource=pivotTarget=pivot; 298 pivotLimit=pivot+testcase.pivotLength; 299 300 encodedLength=0; 301 302 for(;;) { 303 pInter=intermediate; 304 ucnv_convertEx(cnv, utf8Cnv, 305 &pInter, pInterLimit, 306 &pIn, pInLimit, 307 pivot, &pivotSource, &pivotTarget, pivotLimit, 308 FALSE, TRUE, pErrorCode); 309 encodedLength+=(int32_t)(pInter-intermediate); 310 311 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 312 /* make sure that we convert once more to really flush */ 313 *pErrorCode=U_ZERO_ERROR; 314 } else if(U_FAILURE(*pErrorCode)) { 315 return; 316 } else { 317 break; // all done 318 } 319 } 320 } 321protected: 322 UConverter *utf8Cnv; 323 const char *input8; 324 int32_t input8Length; 325}; 326 327UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) { 328 switch (index) { 329 case 0: name = "Roundtrip"; if (exec) return Roundtrip::get(*this); break; 330 case 1: name = "FromUnicode"; if (exec) return FromUnicode::get(*this); break; 331 case 2: name = "FromUTF8"; if (exec) return FromUTF8::get(*this); break; 332 default: name = ""; break; 333 } 334 return NULL; 335} 336 337int main(int argc, const char *argv[]) 338{ 339 // Default values for command-line options. 340 options[CHARSET].value = "UTF-8"; 341 options[CHUNK_LENGTH].value = "4096"; 342 options[PIVOT_LENGTH].value = "1024"; 343 344 UErrorCode status = U_ZERO_ERROR; 345 UtfPerformanceTest test(argc, argv, status); 346 347 if (U_FAILURE(status)){ 348 printf("The error is %s\n", u_errorName(status)); 349 test.usage(); 350 return status; 351 } 352 353 if (test.run() == FALSE){ 354 fprintf(stderr, "FAILED: Tests could not be run please check the " 355 "arguments.\n"); 356 return -1; 357 } 358 359 if (fromUCallbackCount > 0) { 360 printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount); 361 } 362 363 return 0; 364} 365