1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/******************************************************************** 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * COPYRIGHT: 354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * Copyright (C) 2001-2012 IBM, Inc. All Rights Reserved. 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ********************************************************************/ 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/******************************************************************************** 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* File ubrkperf.cpp 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Modification History: 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Name Description 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Vladimir Weinstein First Version, based on collperf 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************************* 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// This program tests break iterator performance 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Currently we test only ICU APIs with the future possibility of testing *nix & win32 APIs 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// (if any) 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// A text file is required as input. It must be in utf-8 or utf-16 format, 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// and include a byte order mark. Either LE or BE format is OK. 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst char gUsageString[] = 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "usage: ubrkperf options...\n" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-help Display this message.\n" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-file file_name utf-16/utf-8 format file.\n" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-locale name ICU locale to use. Default is en_US\n" 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-langid 0x1234 Windows Language ID number. Default to value for -locale option\n" 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru " see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n" 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-win Run test using Windows native services. (currently not working) (ICU is default)\n" 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-unix Run test using Unix word breaking services. (currently not working) \n" 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-mac Run test using MacOSX word breaking services.\n" 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-uselen Use API with string lengths. Default is null-terminated strings\n" 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-char Use character break iterator\n" 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-word Use word break iterator\n" 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-line Use line break iterator\n" 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-sentence Use sentence break iterator\n" 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n" 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-iloop n Inner Loop Count. Default = 1. Number of calls to function\n" 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru " under test at each call point. For measuring test overhead.\n" 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-terse Terse numbers-only output. Intended for use by scripts.\n" 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-dump Display stuff.\n" 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-capi Use C APIs instead of C++ APIs (currently not working)\n" 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-next Do the next test\n" 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-isBound Do the isBound test\n" 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ; 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h> 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <string.h> 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdlib.h> 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <math.h> 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <locale.h> 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <errno.h> 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <sys/stat.h> 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/utypes.h> 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/ucol.h> 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/ucoleitr.h> 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/uloc.h> 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/ustring.h> 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/ures.h> 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/uchar.h> 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/ucnv.h> 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/utf8.h> 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/brkiter.h> 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 72103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#if U_PLATFORM_HAS_WIN32_API 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <windows.h> 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Stubs for Windows API functions when building on UNIXes. 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <sys/time.h> 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruunsigned long timeGetTime() { 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru struct timeval t; 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru gettimeofday(&t, 0); 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru unsigned long val = t.tv_sec * 1000; // Let it overflow. Who cares. 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru val += t.tv_usec / 1000; 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return val; 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAKELCID(a,b) 0 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Command line option variables 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// These global variables are set according to the options specified 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// on the command line by the user. 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruchar * opt_fName = 0; 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruchar * opt_locale = "en_US"; 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint opt_langid = 0; // Defaults to value corresponding to opt_locale. 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruchar * opt_rules = 0; 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_help = FALSE; 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint opt_time = 0; 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint opt_loopCount = 0; 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint opt_passesCount= 1; 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_terse = FALSE; 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_icu = TRUE; 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_win = FALSE; // Run with Windows native functions. 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_unix = FALSE; // Run with UNIX strcoll, strxfrm functions. 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_mac = FALSE; // Run with MacOSX word break services. 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_uselen = FALSE; 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_dump = FALSE; 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_char = FALSE; 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_word = FALSE; 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_line = FALSE; 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_sentence = FALSE; 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_capi = FALSE; 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_next = FALSE; 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_isBound = FALSE; 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Definitions for the command line options 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct OptSpec { 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *name; 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru enum {FLAG, NUM, STRING} type; 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void *pVar; 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruOptSpec opts[] = { 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-file", OptSpec::STRING, &opt_fName}, 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-locale", OptSpec::STRING, &opt_locale}, 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-langid", OptSpec::NUM, &opt_langid}, 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-win", OptSpec::FLAG, &opt_win}, 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-unix", OptSpec::FLAG, &opt_unix}, 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-mac", OptSpec::FLAG, &opt_mac}, 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-uselen", OptSpec::FLAG, &opt_uselen}, 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-loop", OptSpec::NUM, &opt_loopCount}, 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-time", OptSpec::NUM, &opt_time}, 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-passes", OptSpec::NUM, &opt_passesCount}, 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-char", OptSpec::FLAG, &opt_char}, 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-word", OptSpec::FLAG, &opt_word}, 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-line", OptSpec::FLAG, &opt_line}, 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-sentence", OptSpec::FLAG, &opt_sentence}, 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-terse", OptSpec::FLAG, &opt_terse}, 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-dump", OptSpec::FLAG, &opt_dump}, 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-capi", OptSpec::FLAG, &opt_capi}, 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-next", OptSpec::FLAG, &opt_next}, 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-isBound", OptSpec::FLAG, &opt_isBound}, 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-help", OptSpec::FLAG, &opt_help}, 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-?", OptSpec::FLAG, &opt_help}, 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {0, OptSpec::FLAG, 0} 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//--------------------------------------------------------------------------- 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Global variables pointing to and describing the test file 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//--------------------------------------------------------------------------- 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//DWORD gWinLCID; 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruBreakIterator *brkit = NULL; 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUChar *text = NULL; 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t textSize = 0; 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 168103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#if U_PLATFORM_IS_DARWIN_BASED 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <ApplicationServices/ApplicationServices.h> 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum{ 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru kUCTextBreakAllMask = (kUCTextBreakClusterMask | kUCTextBreakWordMask | kUCTextBreakLineMask) 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }; 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUCTextBreakType breakTypes[4] = {kUCTextBreakCharMask, kUCTextBreakClusterMask, kUCTextBreakWordMask, kUCTextBreakLineMask}; 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTextBreakLocatorRef breakRef; 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUCTextBreakType macBreakType; 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid createMACBrkIt() { 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru OSStatus status = noErr; 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru LocaleRef lref; 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = LocaleRefFromLocaleString(opt_locale, &lref); 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = UCCreateTextBreakLocator(lref, 0, kUCTextBreakAllMask, (TextBreakLocatorRef*)&breakRef); 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(opt_char == TRUE) { 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru macBreakType = kUCTextBreakClusterMask; 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(opt_word == TRUE) { 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru macBreakType = kUCTextBreakWordMask; 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(opt_line == TRUE) { 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru macBreakType = kUCTextBreakLineMask; 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(opt_sentence == TRUE) { 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // error 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // brkit = BreakIterator::createSentenceInstance(opt_locale, status); 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // default is character iterator 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru macBreakType = kUCTextBreakClusterMask; 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid createICUBrkIt() { 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Set up an ICU break iterator 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(opt_char == TRUE) { 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru brkit = BreakIterator::createCharacterInstance(opt_locale, status); 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(opt_word == TRUE) { 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru brkit = BreakIterator::createWordInstance(opt_locale, status); 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(opt_line == TRUE) { 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru brkit = BreakIterator::createLineInstance(opt_locale, status); 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(opt_sentence == TRUE) { 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru brkit = BreakIterator::createSentenceInstance(opt_locale, status); 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // default is character iterator 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru brkit = BreakIterator::createCharacterInstance(opt_locale, status); 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (status==U_USING_DEFAULT_WARNING && opt_terse==FALSE) { 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", opt_locale); 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (status==U_USING_FALLBACK_WARNING && opt_terse==FALSE) { 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", opt_locale); 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//--------------------------------------------------------------------------- 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// ProcessOptions() Function to read the command line options. 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//--------------------------------------------------------------------------- 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool ProcessOptions(int argc, const char **argv, OptSpec opts[]) 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int i; 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int argNum; 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *pArgName; 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru OptSpec *pOpt; 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (argNum=1; argNum<argc; argNum++) { 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgName = argv[argNum]; 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (pOpt = opts; pOpt->name != 0; pOpt++) { 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (strcmp(pOpt->name, pArgName) == 0) { 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch (pOpt->type) { 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case OptSpec::FLAG: 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(UBool *)(pOpt->pVar) = TRUE; 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case OptSpec::STRING: 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru argNum ++; 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (argNum >= argc) { 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name); 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(const char **)(pOpt->pVar) = argv[argNum]; 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case OptSpec::NUM: 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru argNum ++; 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (argNum >= argc) { 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name); 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *endp; 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i = strtol(argv[argNum], &endp, 0); 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (endp == argv[argNum]) { 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "integer value expected for \"%s\" option.\n", pOpt->name); 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(int *)(pOpt->pVar) = i; 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (pOpt->name == 0) 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName); 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querureturn TRUE; 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid doForwardTest() { 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_terse == FALSE) { 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("Doing the forward test\n"); 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t noBreaks = 0; 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i = 0; 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru unsigned long startTime = timeGetTime(); 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru unsigned long elapsedTime = 0; 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(opt_icu) { 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru createICUBrkIt(); 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru brkit->setText(UnicodeString(text, textSize)); 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru brkit->first(); 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_terse == FALSE) { 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("Warmup\n"); 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int j; 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while((j = brkit->next()) != BreakIterator::DONE) { 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru noBreaks++; 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //fprintf(stderr, "%d ", j); 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_terse == FALSE) { 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("Measure\n"); 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru startTime = timeGetTime(); 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i = 0; i < opt_loopCount; i++) { 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru brkit->first(); 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(brkit->next() != BreakIterator::DONE) { 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru elapsedTime = timeGetTime()-startTime; 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(opt_mac) { 312103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#if U_PLATFORM_IS_DARWIN_BASED 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru createMACBrkIt(); 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UniChar* filePtr = text; 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru OSStatus status = noErr; 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UniCharCount startOffset = 0, breakOffset = 0, numUniChars = textSize; 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru startOffset = 0; 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //printf("\t---Search forward--\n"); 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (startOffset < numUniChars) 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdgeMask, filePtr, numUniChars, 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru startOffset, &breakOffset); 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //require_action(status == noErr, EXIT, printf( "**UCFindTextBreak failed: startOffset %d, status %d\n", (int)startOffset, (int)status)); 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //require_action((breakOffset <= numUniChars),EXIT, printf("**UCFindTextBreak breakOffset too big: startOffset %d, breakOffset %d\n", (int)startOffset, (int)breakOffset)); 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Output break 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //printf("\t%d\n", (int)breakOffset); 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Increment counters 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru noBreaks++; 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru startOffset = breakOffset; 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru startTime = timeGetTime(); 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i = 0; i < opt_loopCount; i++) { 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru startOffset = 0; 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (startOffset < numUniChars) 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdgeMask, filePtr, numUniChars, 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru startOffset, &breakOffset); 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Increment counters 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru startOffset = breakOffset; 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru elapsedTime = timeGetTime()-startTime; 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCDisposeTextBreakLocator(&breakRef); 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_terse == FALSE) { 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCount)); 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize)); 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreaks)); 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("forward break iteration average loop time %d\n", loopTime); 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("number of code units %d average time per code unit %d\n", textSize, timePerCU); 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("number of breaks %d average time per break %d\n", noBreaks, timePerBreak); 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize); 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid doIsBoundTest() { 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t noBreaks = 0, hit = 0; 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i = 0, j = 0; 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru unsigned long startTime = timeGetTime(); 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru unsigned long elapsedTime = 0; 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru createICUBrkIt(); 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru brkit->setText(UnicodeString(text, textSize)); 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru brkit->first(); 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(j = 0; j < textSize; j++) { 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(brkit->isBoundary(j)) { 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru noBreaks++; 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //fprintf(stderr, "%d ", j); 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(brkit->next() != BreakIterator::DONE) { 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru noBreaks++; 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru startTime = timeGetTime(); 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i = 0; i < opt_loopCount; i++) { 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(j = 0; j < textSize; j++) { 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(brkit->isBoundary(j)) { 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru hit++; 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru elapsedTime = timeGetTime()-startTime; 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCount)); 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_terse == FALSE) { 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize)); 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreaks)); 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("forward break iteration average loop time %d\n", loopTime); 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("number of code units %d average time per code unit %d\n", textSize, timePerCU); 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("number of breaks %d average time per break %d\n", noBreaks, timePerBreak); 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize); 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// UnixConvert -- Convert the lines of the file to the encoding for UNIX 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Since it appears that Unicode support is going in the general 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// direction of the use of UTF-8 locales, that is the approach 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// that is used here. 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid UnixConvert() { 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int line; 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cvrtr; // An ICU code page converter. 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cvrtr = ucnv_open("utf-8", &status); // we are just doing UTF-8 locales for now. 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "ICU Converter open failed.: %d\n", &status); 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(-1); 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // redo for unix 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (line=0; line < gNumFileLines; line++) { 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int sizeNeeded = ucnv_fromUChars(cvrtr, 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, // ptr to target buffer. 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, // length of target buffer. 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru gFileLines[line].name, 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru -1, // source is null terminated 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &status); 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (status != U_BUFFER_OVERFLOW_ERROR && status != U_ZERO_ERROR) { 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Conversion from Unicode, something is wrong.\n"); 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(-1); 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = U_ZERO_ERROR; 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru gFileLines[line].unixName = new char[sizeNeeded+1]; 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeNeeded = ucnv_fromUChars(cvrtr, 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru gFileLines[line].unixName, // ptr to target buffer. 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeNeeded+1, // length of target buffer. 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru gFileLines[line].name, 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru -1, // source is null terminated 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &status); 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "ICU Conversion Failed.: %d\n", status); 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(-1); 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru gFileLines[line].unixName[sizeNeeded] = 0; 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }; 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_close(cvrtr); 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// class UCharFile Class to hide all the gorp to read a file in 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// and produce a stream of UChars. 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UCharFile { 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCharFile(const char *fileName); 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ~UCharFile(); 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar get(); 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool eof() {return fEof;}; 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool error() {return fError;}; 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t size() { return fFileSize; }; 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCharFile (const UCharFile &other) {}; // No copy constructor. 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCharFile & operator = (const UCharFile &other) {return *this;}; // No assignment op 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FILE *fFile; 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *fName; 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool fEof; 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool fError; 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar fPending2ndSurrogate; 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t fFileSize; 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru enum {UTF16LE, UTF16BE, UTF8} fEncoding; 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUCharFile::UCharFile(const char * fileName) { 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fEof = FALSE; 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fError = FALSE; 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fName = fileName; 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru struct stat buf; 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t result = stat(fileName, &buf); 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(result != 0) { 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Error getting info\n"); 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fFileSize = -1; 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fFileSize = buf.st_size; 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fFile = fopen(fName, "rb"); 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fPending2ndSurrogate = 0; 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fFile == NULL) { 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Can not open file \"%s\"\n", opt_fName); 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fError = TRUE; 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Look for the byte order mark at the start of the file. 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int BOMC1, BOMC2, BOMC3; 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru BOMC1 = fgetc(fFile); 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru BOMC2 = fgetc(fFile); 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (BOMC1 == 0xff && BOMC2 == 0xfe) { 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fEncoding = UTF16LE; } 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else if (BOMC1 == 0xfe && BOMC2 == 0xff) { 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fEncoding = UTF16BE; } 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else if (BOMC1 == 0xEF && BOMC2 == 0xBB && (BOMC3 = fgetc(fFile)) == 0xBF ) { 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fEncoding = UTF8; } 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "collperf: file \"%s\" encoding must be UTF-8 or UTF-16, and " 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "must include a BOM.\n", fileName); 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fError = true; 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUCharFile::~UCharFile() { 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fclose(fFile); 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUChar UCharFile::get() { 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar c; 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch (fEncoding) { 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UTF16LE: 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int cL, cH; 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cL = fgetc(fFile); 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cH = fgetc(fFile); 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = cL | (cH << 8); 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (cH == EOF) { 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = 0; 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fEof = TRUE; 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UTF16BE: 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int cL, cH; 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cH = fgetc(fFile); 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cL = fgetc(fFile); 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = cL | (cH << 8); 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (cL == EOF) { 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = 0; 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fEof = TRUE; 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UTF8: 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fPending2ndSurrogate != 0) { 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = fPending2ndSurrogate; 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fPending2ndSurrogate = 0; 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int ch = fgetc(fFile); // Note: c and ch are separate cause eof test doesn't work on UChar type. 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (ch == EOF) { 576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = 0; 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fEof = TRUE; 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (ch <= 0x7f) { 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // It's ascii. No further utf-8 conversion. 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = ch; 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Figure out the lenght of the char and read the rest of the bytes 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // into a temp array. 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int nBytes; 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (ch >= 0xF0) {nBytes=4;} 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else if (ch >= 0xE0) {nBytes=3;} 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else if (ch >= 0xC0) {nBytes=2;} 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "not likely utf-8 encoded file %s contains corrupt data at offset %d.\n", fName, ftell(fFile)); 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fError = TRUE; 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru unsigned char bytes[10]; 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes[0] = (unsigned char)ch; 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int i; 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (i=1; i<nBytes; i++) { 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes[i] = fgetc(fFile); 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (bytes[i] < 0x80 || bytes[i] >= 0xc0) { 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "utf-8 encoded file %s contains corrupt data at offset %d. Expected %d bytes, byte %d is invalid. First byte is %02X\n", fName, ftell(fFile), nBytes, i, ch); 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fError = TRUE; 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Convert the bytes from the temp array to a Unicode char. 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i = 0; 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t cp; 61454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius U8_NEXT_UNSAFE(bytes, i, cp); 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = (UChar)cp; 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (cp >= 0x10000) { 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // The code point needs to be broken up into a utf-16 surrogate pair. 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Process first half this time through the main loop, and 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // remember the other half for the next time through. 621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar utf16Buf[3]; 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i = 0; 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTF16_APPEND_CHAR_UNSAFE(utf16Buf, i, cp); 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fPending2ndSurrogate = utf16Buf[1]; 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = utf16Buf[0]; 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }; 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c; 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Main -- process command line, read in and pre-process the test file, 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// call other functions to do the actual tests. 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint main(int argc, const char** argv) { 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (ProcessOptions(argc, argv, opts) != TRUE || opt_help || opt_fName == 0) { 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf(gUsageString); 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit (1); 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Make sure that we've only got one API selected. 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_mac || opt_unix || opt_win) opt_icu = FALSE; 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_mac || opt_unix) opt_win = FALSE; 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_mac) opt_unix = FALSE; 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Set up a Windows LCID 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_langid != 0) { 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru gWinLCID = MAKELCID(opt_langid, SORT_DEFAULT); 660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru gWinLCID = uloc_getLCID(opt_locale); 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Set the UNIX locale 668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_unix) { 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (setlocale(LC_ALL, opt_locale) == 0) { 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "setlocale(LC_ALL, %s) failed.\n", opt_locale); 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(-1); 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Read in the input file. 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // File assumed to be utf-16. 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Lines go onto heap buffers. Global index array to line starts is created. 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Lines themselves are null terminated. 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCharFile f(opt_fName); 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (f.error()) { 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(-1); 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t fileSize = f.size(); 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const int STARTSIZE = 70000; 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t bufSize = 0; 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t charCount = 0; 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(fileSize != -1) { 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text = (UChar *)malloc(fileSize*sizeof(UChar)); 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bufSize = fileSize; 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text = (UChar *)malloc(STARTSIZE*sizeof(UChar)); 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bufSize = STARTSIZE; 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(text == NULL) { 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Allocating buffer failed\n"); 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(-1); 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Read the file, split into lines, and save in memory. 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Loop runs once per utf-16 value from the input file, 705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // (The number of bytes read from file per loop iteration depends on external encoding.) 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (;;) { 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar c = f.get(); 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(f.eof()) { 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (f.error()){ 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(-1); 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // We now have a good UTF-16 value in c. 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text[charCount++] = c; 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(charCount == bufSize) { 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text = (UChar *)realloc(text, 2*bufSize*sizeof(UChar)); 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(text == NULL) { 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Reallocating buffer failed\n"); 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(-1); 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bufSize *= 2; 724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_terse == FALSE) { 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("file \"%s\", %d charCount code units.\n", opt_fName, charCount); 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru textSize = charCount; 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Dump file contents if requested. 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_dump) { 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // dump file, etc... possibly 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // We've got the file read into memory. Go do something with it. 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i = 0; 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i = 0; i < opt_passesCount; i++) { 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(opt_loopCount != 0) { 751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(opt_next) { 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru doForwardTest(); 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(opt_isBound) { 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru doIsBoundTest(); 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru doForwardTest(); 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(opt_time != 0) { 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(text != NULL) { 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru free(text); 765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(brkit != NULL) { 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete brkit; 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 772