1c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru/******************************************************************** 2c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * COPYRIGHT: 350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Copyright (C) 2001-2010 IBM, Inc. All Rights Reserved. 4c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 5c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ********************************************************************/ 6c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru/******************************************************************************** 7c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru* 8c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru* File CALLCOLL.C 9c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru* 10c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru* Modification History: 11c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru* Name Description 12c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru* Andy Heninger First Version 13c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru* 14c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru********************************************************************************* 15c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru*/ 16c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 17c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 18c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// This program tests string collation and sort key generation performance. 19c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Three APIs can be teste: ICU C , Unix strcoll, strxfrm and Windows LCMapString 20c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// A file of names is required as input, one per line. It must be in utf-8 or utf-16 format, 21c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// and include a byte order mark. Either LE or BE format is OK. 22c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 23c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 24c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruconst char gUsageString[] = 25c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "usage: collperf options...\n" 26c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-help Display this message.\n" 27c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-file file_name utf-16 format file of names.\n" 28c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-locale name ICU locale to use. Default is en_US\n" 29c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-rules file_name Collation rules file (overrides locale)\n" 30c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-langid 0x1234 Windows Language ID number. Default to value for -locale option\n" 31c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru " see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n" 32c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-win Run test using Windows native services. (ICU is default)\n" 33c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-unix Run test using Unix strxfrm, strcoll services.\n" 34c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-uselen Use API with string lengths. Default is null-terminated strings\n" 35c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-usekeys Run tests using sortkeys rather than strcoll\n" 36c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-strcmp Run tests using u_strcmp rather than strcoll\n" 37c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-strcmpCPO Run tests using u_strcmpCodePointOrder rather than strcoll\n" 38c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n" 39c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-iloop n Inner Loop Count. Default = 1. Number of calls to function\n" 40c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru " under test at each call point. For measuring test overhead.\n" 41c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-terse Terse numbers-only output. Intended for use by scripts.\n" 42c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-french French accent ordering\n" 43c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-frenchoff No French accent ordering (for use with French locales.)\n" 44c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-norm Normalizing mode on\n" 45c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-shifted Shifted mode\n" 46c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-lower Lower case first\n" 47c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-upper Upper case first\n" 48c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-case Enable separate case level\n" 49c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n" 50c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-keyhist Produce a table sort key size vs. string length\n" 51c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-binsearch Binary Search timing test\n" 52c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-keygen Sort Key Generation timing test\n" 53c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-qsort Quicksort timing test\n" 54c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-iter Iteration Performance Test\n" 55c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "-dump Display strings, sort keys and CEs.\n" 56c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ; 57c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 58c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 59c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 60c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <stdio.h> 61c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <string.h> 62c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <stdlib.h> 63c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <math.h> 64c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <locale.h> 65c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <errno.h> 66c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 67c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <unicode/utypes.h> 68c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <unicode/ucol.h> 69c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <unicode/ucoleitr.h> 70c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <unicode/uloc.h> 71c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <unicode/ustring.h> 72c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <unicode/ures.h> 73c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <unicode/uchar.h> 74c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <unicode/ucnv.h> 75c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <unicode/utf8.h> 76c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 77c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#ifdef WIN32 78c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <windows.h> 79c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#else 80c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 81c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Stubs for Windows API functions when building on UNIXes. 82c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 83c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querutypedef int DWORD; 84b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruinline int CompareStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;} 85c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <sys/time.h> 86c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruunsigned long timeGetTime() { 87c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru struct timeval t; 88c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gettimeofday(&t, 0); 89c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unsigned long val = t.tv_sec * 1000; // Let it overflow. Who cares. 90c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru val += t.tv_usec / 1000; 91c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return val; 92b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 93b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruinline int LCMapStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;} 94c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruconst int LCMAP_SORTKEY = 0; 95c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define MAKELCID(a,b) 0 96c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruconst int SORT_DEFAULT = 0; 97c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#endif 98c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 99c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Command line option variables 103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// These global variables are set according to the options specified 104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// on the command line by the user. 105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruchar * opt_fName = 0; 106b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruconst char * opt_locale = "en_US"; 107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint opt_langid = 0; // Defaults to value corresponding to opt_locale. 108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruchar * opt_rules = 0; 109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_help = FALSE; 110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint opt_loopCount = 1; 111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint opt_iLoopCount = 1; 112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_terse = FALSE; 113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_qsort = FALSE; 114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_binsearch = FALSE; 115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_icu = TRUE; 116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_win = FALSE; // Run with Windows native functions. 117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_unix = FALSE; // Run with UNIX strcoll, strxfrm functions. 118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_uselen = FALSE; 119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_usekeys = FALSE; 120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_strcmp = FALSE; 121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_strcmpCPO = FALSE; 122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_norm = FALSE; 123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_keygen = FALSE; 124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_french = FALSE; 125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_frenchoff = FALSE; 126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_shifted = FALSE; 127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_lower = FALSE; 128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_upper = FALSE; 129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_case = FALSE; 130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint opt_level = 0; 131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_keyhist = FALSE; 132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_itertest = FALSE; 133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool opt_dump = FALSE; 134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Definitions for the command line options 139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustruct OptSpec { 141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const char *name; 142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru enum {FLAG, NUM, STRING} type; 143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru void *pVar; 144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}; 145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruOptSpec opts[] = { 147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-file", OptSpec::STRING, &opt_fName}, 148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-locale", OptSpec::STRING, &opt_locale}, 149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-langid", OptSpec::NUM, &opt_langid}, 150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-rules", OptSpec::STRING, &opt_rules}, 151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-qsort", OptSpec::FLAG, &opt_qsort}, 152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-binsearch", OptSpec::FLAG, &opt_binsearch}, 153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-iter", OptSpec::FLAG, &opt_itertest}, 154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-win", OptSpec::FLAG, &opt_win}, 155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-unix", OptSpec::FLAG, &opt_unix}, 156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-uselen", OptSpec::FLAG, &opt_uselen}, 157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-usekeys", OptSpec::FLAG, &opt_usekeys}, 158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-strcmp", OptSpec::FLAG, &opt_strcmp}, 159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-strcmpCPO", OptSpec::FLAG, &opt_strcmpCPO}, 160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-norm", OptSpec::FLAG, &opt_norm}, 161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-french", OptSpec::FLAG, &opt_french}, 162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-frenchoff", OptSpec::FLAG, &opt_frenchoff}, 163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-shifted", OptSpec::FLAG, &opt_shifted}, 164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-lower", OptSpec::FLAG, &opt_lower}, 165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-upper", OptSpec::FLAG, &opt_upper}, 166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-case", OptSpec::FLAG, &opt_case}, 167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-level", OptSpec::NUM, &opt_level}, 168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-keyhist", OptSpec::FLAG, &opt_keyhist}, 169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-keygen", OptSpec::FLAG, &opt_keygen}, 170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-loop", OptSpec::NUM, &opt_loopCount}, 171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-iloop", OptSpec::NUM, &opt_iLoopCount}, 172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-terse", OptSpec::FLAG, &opt_terse}, 173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-dump", OptSpec::FLAG, &opt_dump}, 174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-help", OptSpec::FLAG, &opt_help}, 175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {"-?", OptSpec::FLAG, &opt_help}, 176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru {0, OptSpec::FLAG, 0} 177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}; 178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------- 181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Global variables pointing to and describing the test file 183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------- 185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// struct Line 188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Each line from the source file (containing a name, presumably) gets 190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// one of these structs. 191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustruct Line { 193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar *name; 194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int len; 195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char *winSortKey; 196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char *icuSortKey; 197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char *unixSortKey; 198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char *unixName; 199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}; 200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruLine *gFileLines; // Ptr to array of Line structs, one per line in the file. 204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint gNumFileLines; 205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUCollator *gCol; 206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruDWORD gWinLCID; 207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruLine **gSortedLines; 209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruLine **gRandomLines; 210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint gCount; 211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------- 215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// ProcessOptions() Function to read the command line options. 217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------- 219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool ProcessOptions(int argc, const char **argv, OptSpec opts[]) 220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int i; 222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int argNum; 223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const char *pArgName; 224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru OptSpec *pOpt; 225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (argNum=1; argNum<argc; argNum++) { 227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru pArgName = argv[argNum]; 228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (pOpt = opts; pOpt->name != 0; pOpt++) { 229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strcmp(pOpt->name, pArgName) == 0) { 230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru switch (pOpt->type) { 231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case OptSpec::FLAG: 232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(UBool *)(pOpt->pVar) = TRUE; 233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case OptSpec::STRING: 235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru argNum ++; 236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (argNum >= argc) { 237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name); 238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(const char **)(pOpt->pVar) = argv[argNum]; 241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case OptSpec::NUM: 243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru argNum ++; 244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (argNum >= argc) { 245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name); 246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char *endp; 249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru i = strtol(argv[argNum], &endp, 0); 250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (endp == argv[argNum]) { 251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "integer value expected for \"%s\" option.\n", pOpt->name); 252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(int *)(pOpt->pVar) = i; 255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (pOpt->name == 0) 260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName); 262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querureturn TRUE; 266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Comparison functions for use by qsort. 271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Six flavors, ICU or Windows, SortKey or String Compare, Strings with length 273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// or null terminated. 274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint ICUstrcmpK(const void *a, const void *b) { 277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount++; 278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int t = strcmp((*(Line **)a)->icuSortKey, (*(Line **)b)->icuSortKey); 279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return t; 280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint ICUstrcmpL(const void *a, const void *b) { 284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount++; 285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCollationResult t; 286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t = ucol_strcoll(gCol, (*(Line **)a)->name, (*(Line **)a)->len, (*(Line **)b)->name, (*(Line **)b)->len); 287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (t == UCOL_LESS) return -1; 288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (t == UCOL_GREATER) return +1; 289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint ICUstrcmp(const void *a, const void *b) { 294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount++; 295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCollationResult t; 296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t = ucol_strcoll(gCol, (*(Line **)a)->name, -1, (*(Line **)b)->name, -1); 297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (t == UCOL_LESS) return -1; 298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (t == UCOL_GREATER) return +1; 299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint Winstrcmp(const void *a, const void *b) { 304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount++; 305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int t; 306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t = CompareStringW(gWinLCID, 0, (*(Line **)a)->name, -1, (*(Line **)b)->name, -1); 307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return t-2; 308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint UNIXstrcmp(const void *a, const void *b) { 312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount++; 313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int t; 314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t = strcoll((*(Line **)a)->unixName, (*(Line **)b)->unixName); 315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return t; 316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 317c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 318c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 319c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint WinstrcmpL(const void *a, const void *b) { 320c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount++; 321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int t; 322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t = CompareStringW(gWinLCID, 0, (*(Line **)a)->name, (*(Line **)a)->len, (*(Line **)b)->name, (*(Line **)b)->len); 323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return t-2; 324c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint WinstrcmpK(const void *a, const void *b) { 328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount++; 329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int t = strcmp((*(Line **)a)->winSortKey, (*(Line **)b)->winSortKey); 330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return t; 331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Function for sorting the names (lines) into a random order. 337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Order is based on a hash of the ICU Sort key for the lines 338c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// The randomized order is used as input for the sorting timing tests. 339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 340c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint ICURandomCmp(const void *a, const void *b) { 342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char *ask = (*(Line **)a)->icuSortKey; 343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char *bsk = (*(Line **)b)->icuSortKey; 344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int aVal = 0; 345c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int bVal = 0; 346c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int retVal; 347c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (*ask != 0) { 348c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru aVal += aVal*37 + *ask++; 349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 350c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (*bsk != 0) { 351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru bVal += bVal*37 + *bsk++; 352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru retVal = -1; 354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (aVal == bVal) { 355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru retVal = 0; 356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (aVal > bVal) { 358c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru retVal = 1; 359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 360c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return retVal; 361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 364c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 365c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// doKeyGen() Key Generation Timing Test 366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid doKeyGen() 369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int line; 371b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int loops = 0; 372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int iLoop; 373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int t; 374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int len=-1; 375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Adjust loop count to compensate for file size. Should be order n 377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru double dLoopCount = double(opt_loopCount) * (1000. / double(gNumFileLines)); 378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int adj_loopCount = int(dLoopCount); 379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (adj_loopCount < 1) adj_loopCount = 1; 380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unsigned long startTime = timeGetTime(); 383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_win) { 385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (loops=0; loops<adj_loopCount; loops++) { 386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (line=0; line < gNumFileLines; line++) { 387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_uselen) { 388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru len = gFileLines[line].len; 389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { 391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t=LCMapStringW(gWinLCID, LCMAP_SORTKEY, 392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gFileLines[line].name, len, 393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (unsigned short *)gFileLines[line].winSortKey, 5000); // TODO something with length. 394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (opt_icu) 399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (loops=0; loops<adj_loopCount; loops++) { 401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (line=0; line < gNumFileLines; line++) { 402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_uselen) { 403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru len = gFileLines[line].len; 404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { 406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t = ucol_getSortKey(gCol, gFileLines[line].name, len, (unsigned char *)gFileLines[line].icuSortKey, 5000); 407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (opt_unix) 412c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 413c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (loops=0; loops<adj_loopCount; loops++) { 414c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (line=0; line < gNumFileLines; line++) { 415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { 416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t = strxfrm(gFileLines[line].unixSortKey, gFileLines[line].unixName, 5000); 417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unsigned long elapsedTime = timeGetTime() - startTime; 423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int ns = (int)(float(1000000) * (float)elapsedTime / (float)(adj_loopCount*gNumFileLines)); 424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_terse == FALSE) { 426c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("Sort Key Generation: total # of keys = %d\n", loops*gNumFileLines); 427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("Sort Key Generation: time per key = %d ns\n", ns); 428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 430c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("%d, ", ns); 431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 433c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int totalKeyLen = 0; 434c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int totalChars = 0; 435c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (line=0; line<gNumFileLines; line++) { 436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru totalChars += u_strlen(gFileLines[line].name); 437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_win) { 438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru totalKeyLen += strlen(gFileLines[line].winSortKey); 439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (opt_icu) { 441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru totalKeyLen += strlen(gFileLines[line].icuSortKey); 442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (opt_unix) { 444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru totalKeyLen += strlen(gFileLines[line].unixSortKey); 445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_terse == FALSE) { 449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("Key Length / character = %f\n", (float)totalKeyLen / (float)totalChars); 450c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 451c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("%f, ", (float)totalKeyLen / (float)totalChars); 452c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// doBinarySearch() Binary Search timing test. Each name from the list 460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// is looked up in the full sorted list of names. 461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid doBinarySearch() 464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount = 0; 467c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int line; 468b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int loops = 0; 469b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int iLoop = 0; 470b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru unsigned long elapsedTime = 0; 471c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 472c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Adjust loop count to compensate for file size. Should be order n (lookups) * log n (compares/lookup) 473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Accurate timings do not depend on this being perfect. The correction is just to try to 474c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // get total running times of about the right order, so the that user doesn't need to 475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // manually adjust the loop count for every different file size. 476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru double dLoopCount = double(opt_loopCount) * 3000. / (log10(gNumFileLines) * double(gNumFileLines)); 477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_usekeys) dLoopCount *= 5; 478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int adj_loopCount = int(dLoopCount); 479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (adj_loopCount < 1) adj_loopCount = 1; 480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { // not really a loop, just allows "break" to work, to simplify 483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // inadvertantly running more than one test through here. 484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_strcmp || opt_strcmpCPO) 485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unsigned long startTime = timeGetTime(); 487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru typedef int32_t (U_EXPORT2 *PF)(const UChar *, const UChar *); 488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru PF pf = u_strcmp; 489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_strcmpCPO) {pf = u_strcmpCodePointOrder;} 490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //if (opt_strcmp && opt_win) {pf = (PF)wcscmp;} // Damn the difference between int32_t and int 491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // which forces the use of a cast here. 492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 493b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int r = 0; 494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (loops=0; loops<adj_loopCount; loops++) { 495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (line=0; line < gNumFileLines; line++) { 497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int hi = gNumFileLines-1; 498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int lo = 0; 499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int guess = -1; 500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { 501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int newGuess = (hi + lo) / 2; 502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (newGuess == guess) 503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru guess = newGuess; 505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { 506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru r = (*pf)((gSortedLines[line])->name, (gSortedLines[guess])->name); 507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount++; 509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (r== 0) 510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (r < 0) 512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru hi = guess; 513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru lo = guess; 515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru elapsedTime = timeGetTime() - startTime; 519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_icu) 524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unsigned long startTime = timeGetTime(); 526b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCollationResult r = UCOL_EQUAL; 527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (loops=0; loops<adj_loopCount; loops++) { 528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (line=0; line < gNumFileLines; line++) { 530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int lineLen = -1; 531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int guessLen = -1; 532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_uselen) { 533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru lineLen = (gSortedLines[line])->len; 534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int hi = gNumFileLines-1; 536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int lo = 0; 537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int guess = -1; 538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { 539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int newGuess = (hi + lo) / 2; 540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (newGuess == guess) 541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru guess = newGuess; 543b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int ri = 0; 544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_usekeys) { 545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { 546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ri = strcmp((gSortedLines[line])->icuSortKey, (gSortedLines[guess])->icuSortKey); 547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount++; 549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru r=UCOL_GREATER; if(ri<0) {r=UCOL_LESS;} else if (ri==0) {r=UCOL_EQUAL;} 550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_uselen) { 554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru guessLen = (gSortedLines[guess])->len; 555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { 557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru r = ucol_strcoll(gCol, (gSortedLines[line])->name, lineLen, (gSortedLines[guess])->name, guessLen); 558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount++; 560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (r== UCOL_EQUAL) 562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (r == UCOL_LESS) 564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru hi = guess; 565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru lo = guess; 567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru elapsedTime = timeGetTime() - startTime; 571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_win) 575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unsigned long startTime = timeGetTime(); 577b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int r = 0; 578c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (loops=0; loops<adj_loopCount; loops++) { 579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 580c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (line=0; line < gNumFileLines; line++) { 581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int lineLen = -1; 582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int guessLen = -1; 583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_uselen) { 584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru lineLen = (gSortedLines[line])->len; 585c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 586c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int hi = gNumFileLines-1; 587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int lo = 0; 588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int guess = -1; 589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { 590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int newGuess = (hi + lo) / 2; 591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (newGuess == guess) 592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru guess = newGuess; 594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_usekeys) { 595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { 596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru r = strcmp((gSortedLines[line])->winSortKey, (gSortedLines[guess])->winSortKey); 597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 598c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount++; 599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru r+=2; 600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_uselen) { 604c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru guessLen = (gSortedLines[guess])->len; 605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 606c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { 607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru r = CompareStringW(gWinLCID, 0, (gSortedLines[line])->name, lineLen, (gSortedLines[guess])->name, guessLen); 608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (r == 0) { 610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_terse == FALSE) { 611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "Error returned from Windows CompareStringW.\n"); 612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru exit(-1); 614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount++; 616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (r== 2) // strings == 618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (r == 1) // line < guess 620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru hi = guess; 621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else // line > guess 622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru lo = guess; 623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 624c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 625c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 626c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru elapsedTime = timeGetTime() - startTime; 627c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 629c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_unix) 631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unsigned long startTime = timeGetTime(); 633b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int r = 0; 634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (loops=0; loops<adj_loopCount; loops++) { 635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (line=0; line < gNumFileLines; line++) { 637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int hi = gNumFileLines-1; 638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int lo = 0; 639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int guess = -1; 640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { 641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int newGuess = (hi + lo) / 2; 642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (newGuess == guess) 643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru guess = newGuess; 645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_usekeys) { 646c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { 647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru r = strcmp((gSortedLines[line])->unixSortKey, (gSortedLines[guess])->unixSortKey); 648c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount++; 650c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 652c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { 654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru r = strcoll((gSortedLines[line])->unixName, (gSortedLines[guess])->unixName); 655c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 656c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errno = 0; 657c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (errno != 0) { 658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "Error %d returned from strcoll.\n", errno); 659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru exit(-1); 660c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 661c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount++; 662c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (r == 0) // strings == 664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (r < 0) // line < guess 666c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru hi = guess; 667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else // line > guess 668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru lo = guess; 669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru elapsedTime = timeGetTime() - startTime; 673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 675c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 678c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); 679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_terse == FALSE) { 680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("binary search: total # of string compares = %d\n", gCount); 681c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("binary search: compares per loop = %d\n", gCount / loops); 682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("binary search: time per compare = %d ns\n", ns); 683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("%d, ", ns); 685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// doQSort() The quick sort timing test. Uses the C library qsort function. 695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid doQSort() { 698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int i; 699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru Line **sortBuf = new Line *[gNumFileLines]; 700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Adjust loop count to compensate for file size. QSort should be n log(n) 702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru double dLoopCount = double(opt_loopCount) * 3000. / (log10(gNumFileLines) * double(gNumFileLines)); 703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_usekeys) dLoopCount *= 5; 704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int adj_loopCount = int(dLoopCount); 705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (adj_loopCount < 1) adj_loopCount = 1; 706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount = 0; 709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unsigned long startTime = timeGetTime(); 710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_win && opt_usekeys) { 711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<opt_loopCount; i++) { 712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *)); 713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru qsort(sortBuf, gNumFileLines, sizeof(Line *), WinstrcmpK); 714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (opt_win && opt_uselen) { 718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<adj_loopCount; i++) { 719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *)); 720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru qsort(sortBuf, gNumFileLines, sizeof(Line *), WinstrcmpL); 721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (opt_win && !opt_uselen) { 726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<adj_loopCount; i++) { 727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *)); 728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru qsort(sortBuf, gNumFileLines, sizeof(Line *), Winstrcmp); 729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (opt_icu && opt_usekeys) { 733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<adj_loopCount; i++) { 734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *)); 735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru qsort(sortBuf, gNumFileLines, sizeof(Line *), ICUstrcmpK); 736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (opt_icu && opt_uselen) { 740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<adj_loopCount; i++) { 741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *)); 742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru qsort(sortBuf, gNumFileLines, sizeof(Line *), ICUstrcmpL); 743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (opt_icu && !opt_uselen) { 748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<adj_loopCount; i++) { 749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *)); 750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru qsort(sortBuf, gNumFileLines, sizeof(Line *), ICUstrcmp); 751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (opt_unix && !opt_usekeys) { 755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<adj_loopCount; i++) { 756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *)); 757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru qsort(sortBuf, gNumFileLines, sizeof(Line *), UNIXstrcmp); 758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unsigned long elapsedTime = timeGetTime() - startTime; 762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); 763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_terse == FALSE) { 764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("qsort: total # of string compares = %d\n", gCount); 765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("qsort: time per compare = %d ns\n", ns); 766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("%d, ", ns); 768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 769b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// doKeyHist() Output a table of data for 776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// average sort key size vs. string length. 777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid doKeyHist() { 780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int i; 781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int maxLen = 0; 782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Find the maximum string length 784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<gNumFileLines; i++) { 785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (gFileLines[i].len > maxLen) maxLen = gFileLines[i].len; 786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Allocate arrays to hold the histogram data 789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int *accumulatedLen = new int[maxLen+1]; 790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int *numKeysOfSize = new int[maxLen+1]; 791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<=maxLen; i++) { 792c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru accumulatedLen[i] = 0; 793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numKeysOfSize[i] = 0; 794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Fill the arrays... 797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<gNumFileLines; i++) { 798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int len = gFileLines[i].len; 799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru accumulatedLen[len] += strlen(gFileLines[i].icuSortKey); 800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru numKeysOfSize[len] += 1; 801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // And write out averages 804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("String Length, Avg Key Length, Avg Key Len per char\n"); 805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=1; i<=maxLen; i++) { 806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (numKeysOfSize[i] > 0) { 807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("%d, %f, %f\n", i, (float)accumulatedLen[i] / (float)numKeysOfSize[i], 808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (float)accumulatedLen[i] / (float)(numKeysOfSize[i] * i)); 809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 81150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete []accumulatedLen; 81250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete []numKeysOfSize ; 813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// doForwardIterTest(UBool) Forward iteration test 818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// argument null-terminated string used 819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid doForwardIterTest(UBool haslen) { 822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int count = 0; 823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode error = U_ZERO_ERROR; 825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("\n\nPerforming forward iteration performance test with "); 826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (haslen) { 828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("non-null terminated data -----------\n"); 829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("null terminated data -----------\n"); 832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("performance test on strings from file -----------\n"); 834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar dummytext[] = {0, 0}; 836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCollationElements *iter = ucol_openElements(gCol, NULL, 0, &error); 837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setText(iter, dummytext, 1, &error); 838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount = 0; 840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unsigned long startTime = timeGetTime(); 841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count < opt_loopCount) { 842c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int linecount = 0; 843c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (linecount < gNumFileLines) { 844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar *str = gFileLines[linecount].name; 845c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int strlen = haslen?gFileLines[linecount].len:-1; 846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setText(iter, str, strlen, &error); 847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (ucol_next(iter, &error) != UCOL_NULLORDER) { 848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount++; 849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 850c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru linecount ++; 852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count ++; 854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unsigned long elapsedTime = timeGetTime() - startTime; 856b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru printf("elapsedTime %ld\n", elapsedTime); 857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // empty loop recalculation 859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count = 0; 860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru startTime = timeGetTime(); 861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count < opt_loopCount) { 862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int linecount = 0; 863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (linecount < gNumFileLines) { 864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar *str = gFileLines[linecount].name; 865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int strlen = haslen?gFileLines[linecount].len:-1; 866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setText(iter, str, strlen, &error); 867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru linecount ++; 868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count ++; 870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru elapsedTime -= (timeGetTime() - startTime); 872b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru printf("elapsedTime %ld\n", elapsedTime); 873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_closeElements(iter); 875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); 877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("Total number of strings compared %d in %d loops\n", gNumFileLines, 878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru opt_loopCount); 879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("Average time per ucol_next() nano seconds %d\n", ns); 880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("performance test on skipped-5 concatenated strings from file -----------\n"); 882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar *str; 884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int strlen = 0; 885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // appending all the strings 886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int linecount = 0; 887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (linecount < gNumFileLines) { 888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strlen += haslen?gFileLines[linecount].len: 889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru u_strlen(gFileLines[linecount].name); 890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru linecount ++; 891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru str = (UChar *)malloc(sizeof(UChar) * strlen); 893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int strindex = 0; 894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru linecount = 0; 895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (strindex < strlen) { 896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int len = 0; 897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru len += haslen?gFileLines[linecount].len: 898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru u_strlen(gFileLines[linecount].name); 899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru memcpy(str + strindex, gFileLines[linecount].name, 900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sizeof(UChar) * len); 901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strindex += len; 902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru linecount ++; 903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("Total size of strings %d\n", strlen); 906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 907c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount = 0; 908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count = 0; 909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!haslen) { 911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strlen = -1; 912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iter = ucol_openElements(gCol, str, strlen, &error); 914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!haslen) { 915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strlen = u_strlen(str); 916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strlen -= 5; // any left over characters are not iterated, 918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // this is to ensure the backwards and forwards iterators 919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // gets the same position 920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru startTime = timeGetTime(); 921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count < opt_loopCount) { 922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int count5 = 5; 923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strindex = 0; 924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setOffset(iter, strindex, &error); 925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (TRUE) { 926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ucol_next(iter, &error) == UCOL_NULLORDER) { 927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount++; 930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count5 --; 931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count5 == 0) { 932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strindex += 10; 933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strindex > strlen) { 934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setOffset(iter, strindex, &error); 937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count5 = 5; 938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count ++; 941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 942c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru elapsedTime = timeGetTime() - startTime; 944b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru printf("elapsedTime %ld\n", elapsedTime); 945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // empty loop recalculation 947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int tempgCount = 0; 948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count = 0; 949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru startTime = timeGetTime(); 950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count < opt_loopCount) { 951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int count5 = 5; 952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strindex = 0; 953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setOffset(iter, strindex, &error); 954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (TRUE) { 955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tempgCount ++; 956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count5 --; 957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count5 == 0) { 958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strindex += 10; 959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strindex > strlen) { 960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setOffset(iter, strindex, &error); 963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count5 = 5; 964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count ++; 967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru elapsedTime -= (timeGetTime() - startTime); 969b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru printf("elapsedTime %ld\n", elapsedTime); 970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_closeElements(iter); 972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("gCount %d\n", gCount); 974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); 975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("Average time per ucol_next() nano seconds %d\n", ns); 976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// doBackwardIterTest(UBool) Backwards iteration test 981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// argument null-terminated string used 982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid doBackwardIterTest(UBool haslen) { 985c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int count = 0; 986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode error = U_ZERO_ERROR; 987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("\n\nPerforming backward iteration performance test with "); 988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (haslen) { 990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("non-null terminated data -----------\n"); 991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("null terminated data -----------\n"); 994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("performance test on strings from file -----------\n"); 997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCollationElements *iter = ucol_openElements(gCol, NULL, 0, &error); 999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar dummytext[] = {0, 0}; 1000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setText(iter, dummytext, 1, &error); 1001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount = 0; 1003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unsigned long startTime = timeGetTime(); 1004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count < opt_loopCount) { 1005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int linecount = 0; 1006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (linecount < gNumFileLines) { 1007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar *str = gFileLines[linecount].name; 1008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int strlen = haslen?gFileLines[linecount].len:-1; 1009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setText(iter, str, strlen, &error); 1010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (ucol_previous(iter, &error) != UCOL_NULLORDER) { 1011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount ++; 1012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru linecount ++; 1015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count ++; 1017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unsigned long elapsedTime = timeGetTime() - startTime; 1019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1020b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru printf("elapsedTime %ld\n", elapsedTime); 1021c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // empty loop recalculation 1023c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count = 0; 1024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru startTime = timeGetTime(); 1025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count < opt_loopCount) { 1026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int linecount = 0; 1027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (linecount < gNumFileLines) { 1028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar *str = gFileLines[linecount].name; 1029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int strlen = haslen?gFileLines[linecount].len:-1; 1030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setText(iter, str, strlen, &error); 1031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru linecount ++; 1032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1033c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count ++; 1034c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru elapsedTime -= (timeGetTime() - startTime); 1036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1037b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru printf("elapsedTime %ld\n", elapsedTime); 1038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_closeElements(iter); 1039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); 1041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("Total number of strings compared %d in %d loops\n", gNumFileLines, 1042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru opt_loopCount); 1043c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("Average time per ucol_previous() nano seconds %d\n", ns); 1044c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1045c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("performance test on skipped-5 concatenated strings from file -----------\n"); 1046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar *str; 1048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int strlen = 0; 1049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // appending all the strings 1050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int linecount = 0; 1051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (linecount < gNumFileLines) { 1052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strlen += haslen?gFileLines[linecount].len: 1053c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru u_strlen(gFileLines[linecount].name); 1054c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru linecount ++; 1055c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1056c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru str = (UChar *)malloc(sizeof(UChar) * strlen); 1057c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int strindex = 0; 1058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru linecount = 0; 1059c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (strindex < strlen) { 1060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int len = 0; 1061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru len += haslen?gFileLines[linecount].len: 1062c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru u_strlen(gFileLines[linecount].name); 1063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru memcpy(str + strindex, gFileLines[linecount].name, 1064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sizeof(UChar) * len); 1065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strindex += len; 1066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru linecount ++; 1067c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1069c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("Total size of strings %d\n", strlen); 1070c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount = 0; 1072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count = 0; 1073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!haslen) { 1075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strlen = -1; 1076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1078c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iter = ucol_openElements(gCol, str, strlen, &error); 1079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!haslen) { 1080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strlen = u_strlen(str); 1081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru startTime = timeGetTime(); 1084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count < opt_loopCount) { 1085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int count5 = 5; 1086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strindex = 5; 1087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setOffset(iter, strindex, &error); 1088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (TRUE) { 1089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ucol_previous(iter, &error) == UCOL_NULLORDER) { 1090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCount ++; 1093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count5 --; 1094c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count5 == 0) { 1095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strindex += 10; 1096c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strindex > strlen) { 1097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setOffset(iter, strindex, &error); 1100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count5 = 5; 1101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count ++; 1104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru elapsedTime = timeGetTime() - startTime; 1107b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru printf("elapsedTime %ld\n", elapsedTime); 1108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // empty loop recalculation 1110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count = 0; 1111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int tempgCount = 0; 1112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru startTime = timeGetTime(); 1113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (count < opt_loopCount) { 1114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int count5 = 5; 1115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strindex = 5; 1116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setOffset(iter, strindex, &error); 1117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (TRUE) { 1118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tempgCount ++; 1119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count5 --; 1120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (count5 == 0) { 1121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strindex += 10; 1122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strindex > strlen) { 1123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setOffset(iter, strindex, &error); 1126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count5 = 5; 1127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru count ++; 1130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru elapsedTime -= (timeGetTime() - startTime); 1132b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru printf("elapsedTime %ld\n", elapsedTime); 1133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_closeElements(iter); 1134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("gCount %d\n", gCount); 1136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); 1137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("Average time per ucol_previous() nano seconds %d\n", ns); 1138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 1141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// doIterTest() Iteration test 1143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 1145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid doIterTest() { 1146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru doForwardIterTest(opt_uselen); 1147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru doBackwardIterTest(opt_uselen); 1148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 1152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// UnixConvert -- Convert the lines of the file to the encoding for UNIX 1154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Since it appears that Unicode support is going in the general 1155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// direction of the use of UTF-8 locales, that is the approach 1156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// that is used here. 1157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 1159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid UnixConvert() { 1160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int line; 1161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UConverter *cvrtr; // An ICU code page converter. 1163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cvrtr = ucnv_open("utf-8", &status); // we are just doing UTF-8 locales for now. 1167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 1168b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fprintf(stderr, "ICU Converter open failed.: %s\n", u_errorName(status)); 1169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru exit(-1); 1170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (line=0; line < gNumFileLines; line++) { 1173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int sizeNeeded = ucnv_fromUChars(cvrtr, 1174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 0, // ptr to target buffer. 1175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 0, // length of target buffer. 1176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gFileLines[line].name, 1177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru -1, // source is null terminated 1178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru &status); 1179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (status != U_BUFFER_OVERFLOW_ERROR && status != U_ZERO_ERROR) { 1180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //fprintf(stderr, "Conversion from Unicode, something is wrong.\n"); 1181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //exit(-1); 1182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 1184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gFileLines[line].unixName = new char[sizeNeeded+1]; 1185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sizeNeeded = ucnv_fromUChars(cvrtr, 1186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gFileLines[line].unixName, // ptr to target buffer. 1187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sizeNeeded+1, // length of target buffer. 1188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gFileLines[line].name, 1189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru -1, // source is null terminated 1190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru &status); 1191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 1192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "ICU Conversion Failed.: %d\n", status); 1193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru exit(-1); 1194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gFileLines[line].unixName[sizeNeeded] = 0; 1196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru }; 1197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucnv_close(cvrtr); 1198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 1202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// class UCharFile Class to hide all the gorp to read a file in 1204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// and produce a stream of UChars. 1205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 1207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruclass UCharFile { 1208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querupublic: 1209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharFile(const char *fileName); 1210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ~UCharFile(); 1211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar get(); 1212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool eof() {return fEof;}; 1213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool error() {return fError;}; 1214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruprivate: 1216b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCharFile (const UCharFile & /*other*/) {}; // No copy constructor. 1217b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCharFile & operator = (const UCharFile &/*other*/) {return *this;}; // No assignment op 1218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru FILE *fFile; 1220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const char *fName; 1221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool fEof; 1222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool fError; 1223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar fPending2ndSurrogate; 1224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru enum {UTF16LE, UTF16BE, UTF8} fEncoding; 1226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}; 1227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUCharFile::UCharFile(const char * fileName) { 1229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fEof = FALSE; 1230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fError = FALSE; 1231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fName = fileName; 1232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFile = fopen(fName, "rb"); 1233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPending2ndSurrogate = 0; 1234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fFile == NULL) { 1235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "Can not open file \"%s\"\n", opt_fName); 1236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fError = TRUE; 1237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 1238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Look for the byte order mark at the start of the file. 1241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int BOMC1, BOMC2, BOMC3; 1243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru BOMC1 = fgetc(fFile); 1244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru BOMC2 = fgetc(fFile); 1245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (BOMC1 == 0xff && BOMC2 == 0xfe) { 1247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fEncoding = UTF16LE; } 1248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (BOMC1 == 0xfe && BOMC2 == 0xff) { 1249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fEncoding = UTF16BE; } 1250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (BOMC1 == 0xEF && BOMC2 == 0xBB && (BOMC3 = fgetc(fFile)) == 0xBF ) { 1251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fEncoding = UTF8; } 1252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 1253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "collperf: file \"%s\" encoding must be UTF-8 or UTF-16, and " 1255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "must include a BOM.\n", fileName); 1256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fError = true; 1257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 1258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUCharFile::~UCharFile() { 1263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fclose(fFile); 1264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUChar UCharFile::get() { 1269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar c; 1270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru switch (fEncoding) { 1271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case UTF16LE: 1272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int cL, cH; 1274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cL = fgetc(fFile); 1275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cH = fgetc(fFile); 1276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c = cL | (cH << 8); 1277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (cH == EOF) { 1278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c = 0; 1279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fEof = TRUE; 1280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case UTF16BE: 1284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int cL, cH; 1286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cH = fgetc(fFile); 1287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru cL = fgetc(fFile); 1288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c = cL | (cH << 8); 1289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (cL == EOF) { 1290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c = 0; 1291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fEof = TRUE; 1292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case UTF8: 1296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fPending2ndSurrogate != 0) { 1298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c = fPending2ndSurrogate; 1299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPending2ndSurrogate = 0; 1300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int ch = fgetc(fFile); // Note: c and ch are separate cause eof test doesn't work on UChar type. 1304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ch == EOF) { 1305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c = 0; 1306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fEof = TRUE; 1307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ch <= 0x7f) { 1311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // It's ascii. No further utf-8 conversion. 1312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c = ch; 1313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Figure out the lenght of the char and read the rest of the bytes 1317c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // into a temp array. 1318c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int nBytes; 1319c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ch >= 0xF0) {nBytes=4;} 1320c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (ch >= 0xE0) {nBytes=3;} 1321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (ch >= 0xC0) {nBytes=2;} 1322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 1323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "utf-8 encoded file contains corrupt data.\n"); 1324c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fError = TRUE; 1325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 1326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unsigned char bytes[10]; 1329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru bytes[0] = (unsigned char)ch; 1330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int i; 1331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=1; i<nBytes; i++) { 1332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru bytes[i] = fgetc(fFile); 1333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (bytes[i] < 0x80 || bytes[i] >= 0xc0) { 1334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "utf-8 encoded file contains corrupt data.\n"); 1335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fError = TRUE; 1336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 1337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1338c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1340c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Convert the bytes from the temp array to a Unicode char. 1341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru i = 0; 1342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t cp; 1343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTF8_NEXT_CHAR_UNSAFE(bytes, i, cp); 1344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c = (UChar)cp; 1345c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1346c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (cp >= 0x10000) { 1347c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The code point needs to be broken up into a utf-16 surrogate pair. 1348c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Process first half this time through the main loop, and 1349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // remember the other half for the next time through. 1350c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar utf16Buf[3]; 1351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru i = 0; 1352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UTF16_APPEND_CHAR_UNSAFE(utf16Buf, i, cp); 1353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPending2ndSurrogate = utf16Buf[1]; 1354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c = utf16Buf[0]; 1355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru }; 1358b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru default: 1359b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru c = 0xFFFD; /* Error, unspecified codepage*/ 1360b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fprintf(stderr, "UCharFile: Error: unknown fEncoding\n"); 1361b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru exit(1); 1362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return c; 1364c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1365c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 1367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// openRulesCollator - Command line specified a rules file. Read it in 1369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// and open a collator with it. 1370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 1372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUCollator *openRulesCollator() { 1373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharFile f(opt_rules); 1374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (f.error()) { 1375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 1376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int bufLen = 10000; 1379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar *buf = (UChar *)malloc(bufLen * sizeof(UChar)); 1380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int i = 0; 1381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(;;) { 1383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru buf[i] = f.get(); 1384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (f.eof()) { 1385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (f.error()) { 1388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 1389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru i++; 1391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (i >= bufLen) { 1392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru bufLen += 10000; 1393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru buf = (UChar *)realloc(buf, bufLen); 1394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru buf[i] = 0; 1397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCollator *coll = ucol_openRules(buf, u_strlen(buf), UCOL_OFF, 1400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCOL_DEFAULT_STRENGTH, NULL, &status); 1401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 1402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "ICU ucol_openRules() open failed.: %d\n", status); 1403c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 1404c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru free(buf); 1406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return coll; 1407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1412c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1413c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 1414c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1415c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Main -- process command line, read in and pre-process the test file, 1416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// call other functions to do the actual tests. 1417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 1419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint main(int argc, const char** argv) { 1420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ProcessOptions(argc, argv, opts) != TRUE || opt_help || opt_fName == 0) { 1421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf(gUsageString); 1422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru exit (1); 1423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Make sure that we've only got one API selected. 1426c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_unix || opt_win) opt_icu = FALSE; 1427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_unix) opt_win = FALSE; 1428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1430c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Set up an ICU collator 1431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1433c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1434c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_rules != 0) { 1435c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCol = openRulesCollator(); 1436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (gCol == 0) {return -1;} 1437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 1439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gCol = ucol_open(opt_locale, &status); 1440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 1441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "Collator creation failed.: %d\n", status); 1442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 1443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (status==U_USING_DEFAULT_WARNING && opt_terse==FALSE) { 1446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", opt_locale); 1447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (status==U_USING_FALLBACK_WARNING && opt_terse==FALSE) { 1449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", opt_locale); 1450c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1451c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1452c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_norm) { 1453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setAttribute(gCol, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 1454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_french && opt_frenchoff) { 1456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "collperf: Error, specified both -french and -frenchoff options."); 1457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru exit(-1); 1458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_french) { 1460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setAttribute(gCol, UCOL_FRENCH_COLLATION, UCOL_ON, &status); 1461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_frenchoff) { 1463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setAttribute(gCol, UCOL_FRENCH_COLLATION, UCOL_OFF, &status); 1464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_lower) { 1466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setAttribute(gCol, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &status); 1467c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1468c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_upper) { 1469c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setAttribute(gCol, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &status); 1470c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1471c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_case) { 1472c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setAttribute(gCol, UCOL_CASE_LEVEL, UCOL_ON, &status); 1473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1474c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_shifted) { 1475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setAttribute(gCol, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 1476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_level != 0) { 1478c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru switch (opt_level) { 1479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case 1: 1480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_PRIMARY, &status); 1481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case 2: 1483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_SECONDARY, &status); 1484c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case 3: 1486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_TERTIARY, &status); 1487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case 4: 1489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_QUATERNARY, &status); 1490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case 5: 1492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_IDENTICAL, &status); 1493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru default: 1495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "-level param must be between 1 and 5\n"); 1496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru exit(-1); 1497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 1501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "Collator attribute setting failed.: %d\n", status); 1502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 1503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Set up a Windows LCID 1508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_langid != 0) { 1510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gWinLCID = MAKELCID(opt_langid, SORT_DEFAULT); 1511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 1513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gWinLCID = uloc_getLCID(opt_locale); 1514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Set the UNIX locale 1519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_unix) { 1521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (setlocale(LC_ALL, opt_locale) == 0) { 1522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "setlocale(LC_ALL, %s) failed.\n", opt_locale); 1523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru exit(-1); 1524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Read in the input file. 1528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // File assumed to be utf-16. 1529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Lines go onto heap buffers. Global index array to line starts is created. 1530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Lines themselves are null terminated. 1531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCharFile f(opt_fName); 1534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (f.error()) { 1535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru exit(-1); 1536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const int MAXLINES = 100000; 1539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gFileLines = new Line[MAXLINES]; 1540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar buf[1024]; 1541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int column = 0; 1542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Read the file, split into lines, and save in memory. 1544c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Loop runs once per utf-16 value from the input file, 1545c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // (The number of bytes read from file per loop iteration depends on external encoding.) 1546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { 1547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar c = f.get(); 1549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (f.error()){ 1550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru exit(-1); 1551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We now have a good UTF-16 value in c. 1555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Watch for CR, LF, EOF; these finish off a line. 1557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (c == 0xd) { 1558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 1559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (f.eof() || c == 0x0a || c==0x2028) { // Unipad inserts 2028 line separators! 1562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru buf[column++] = 0; 1563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (column > 1) { 1564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gFileLines[gNumFileLines].name = new UChar[column]; 1565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gFileLines[gNumFileLines].len = column-1; 1566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru memcpy(gFileLines[gNumFileLines].name, buf, column * sizeof(UChar)); 1567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gNumFileLines++; 1568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru column = 0; 1569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (gNumFileLines >= MAXLINES) { 1570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "File too big. Max number of lines is %d\n", MAXLINES); 1571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru exit(-1); 1572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (c == 0xa || c == 0x2028) 1576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 1577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 1578c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; // EOF 1579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1580c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru buf[column++] = c; 1581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (column >= 1023) 1582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static UBool warnFlag = TRUE; 1584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (warnFlag) { 1585c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "Warning - file line longer than 1023 chars truncated.\n"); 1586c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru warnFlag = FALSE; 1587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru column--; 1589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_terse == FALSE) { 1593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("file \"%s\", %d lines.\n", opt_fName, gNumFileLines); 1594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Convert the lines to the UNIX encoding. 1598c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_unix) { 1599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnixConvert(); 1600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Pre-compute ICU sort keys for the lines of the file. 1604c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1605c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int line; 1606b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t t; 1607c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1608c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (line=0; line<gNumFileLines; line++) { 1609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t = ucol_getSortKey(gCol, gFileLines[line].name, -1, (unsigned char *)buf, sizeof(buf)); 1610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gFileLines[line].icuSortKey = new char[t]; 1611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1612b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (t > (int32_t)sizeof(buf)) { 1613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t = ucol_getSortKey(gCol, gFileLines[line].name, -1, (unsigned char *)gFileLines[line].icuSortKey , t); 1614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 1616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1617c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru memcpy(gFileLines[line].icuSortKey, buf, t); 1618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1624c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Pre-compute Windows sort keys for the lines of the file. 1625c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1626c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (line=0; line<gNumFileLines; line++) { 1627c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t=LCMapStringW(gWinLCID, LCMAP_SORTKEY, gFileLines[line].name, -1, buf, sizeof(buf)); 1628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gFileLines[line].winSortKey = new char[t]; 1629b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (t > (int32_t)sizeof(buf)) { 1630c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t = LCMapStringW(gWinLCID, LCMAP_SORTKEY, gFileLines[line].name, -1, (unsigned short *)(gFileLines[line].winSortKey), t); 1631c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1632c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 1633c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1634c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru memcpy(gFileLines[line].winSortKey, buf, t); 1635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Pre-compute UNIX sort keys for the lines of the file. 1640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_unix) { 1642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (line=0; line<gNumFileLines; line++) { 1643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t=strxfrm((char *)buf, gFileLines[line].unixName, sizeof(buf)); 1644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gFileLines[line].unixSortKey = new char[t]; 1645b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (t > (int32_t)sizeof(buf)) { 1646c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t = strxfrm(gFileLines[line].unixSortKey, gFileLines[line].unixName, sizeof(buf)); 1647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1648c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else 1649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1650c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru memcpy(gFileLines[line].unixSortKey, buf, t); 1651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1652c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1655c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1656c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1657c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Dump file lines, CEs, Sort Keys if requested. 1658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_dump) { 1660c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int i; 1661c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (line=0; line<gNumFileLines; line++) { 1662c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0;;i++) { 1663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar c = gFileLines[line].name[i]; 1664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (c == 0) 1665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1666c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (c < 0x20 || c > 0x7e) { 1667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("\\u%.4x", c); 1668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 1670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("%c", c); 1671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("\n"); 1674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1675c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf(" CEs: "); 1676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCollationElements *CEiter = ucol_openElements(gCol, gFileLines[line].name, -1, &status); 1677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t ce; 1678c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru i = 0; 1679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { 1680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ce = ucol_next(CEiter, &status); 1681c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ce == UCOL_NULLORDER) { 1682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf(" %.8x", ce); 1685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (++i > 8) { 1686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("\n "); 1687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru i = 0; 1688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("\n"); 1691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_closeElements(CEiter); 1692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf(" ICU Sort Key: "); 1695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; ; i++) { 1696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru unsigned char c = gFileLines[line].icuSortKey[i]; 1697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("%02x ", c); 1698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (c == 0) { 1699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (i > 0 && i % 20 == 0) { 1702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("\n "); 1703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("\n"); 1706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Pre-sort the lines. 1712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int i; 1714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gSortedLines = new Line *[gNumFileLines]; 1715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<gNumFileLines; i++) { 1716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gSortedLines[i] = &gFileLines[i]; 1717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_win) { 1720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru qsort(gSortedLines, gNumFileLines, sizeof(Line *), Winstrcmp); 1721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (opt_unix) { 1723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru qsort(gSortedLines, gNumFileLines, sizeof(Line *), UNIXstrcmp); 1724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else /* ICU */ 1726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru qsort(gSortedLines, gNumFileLines, sizeof(Line *), ICUstrcmp); 1728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Make up a randomized order, will be used for sorting tests. 1733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gRandomLines = new Line *[gNumFileLines]; 1735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<gNumFileLines; i++) { 1736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gRandomLines[i] = &gFileLines[i]; 1737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru qsort(gRandomLines, gNumFileLines, sizeof(Line *), ICURandomCmp); 1739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We've got the file read into memory. Go do something with it. 1745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_qsort) doQSort(); 1748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_binsearch) doBinarySearch(); 1749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_keygen) doKeyGen(); 1750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_keyhist) doKeyHist(); 1751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (opt_itertest) doIterTest(); 1752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 1754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1756