1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/********************************************************************
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * COPYRIGHT:
354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * Copyright (C) 2001-2012 IBM, Inc.   All Rights Reserved.
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ********************************************************************/
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/********************************************************************************
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* File ubrkperf.cpp
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Modification History:
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*        Name                     Description
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*     Vladimir Weinstein          First Version, based on collperf
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*********************************************************************************
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  This program tests break iterator performance
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//      Currently we test only ICU APIs with the future possibility of testing *nix & win32 APIs
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//      (if any)
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//      A text file is required as input.  It must be in utf-8 or utf-16 format,
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//      and include a byte order mark.  Either LE or BE format is OK.
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst char gUsageString[] =
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "usage:  ubrkperf options...\n"
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-help                      Display this message.\n"
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-file file_name            utf-16/utf-8 format file.\n"
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-locale name               ICU locale to use.  Default is en_US\n"
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-langid 0x1234             Windows Language ID number.  Default to value for -locale option\n"
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "                              see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n"
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-win                       Run test using Windows native services. (currently not working) (ICU is default)\n"
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-unix                      Run test using Unix word breaking services. (currently not working) \n"
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-mac                       Run test using MacOSX word breaking services.\n"
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-uselen                    Use API with string lengths.  Default is null-terminated strings\n"
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-char                      Use character break iterator\n"
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-word                      Use word break iterator\n"
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-line                      Use line break iterator\n"
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-sentence                  Use sentence break iterator\n"
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-loop nnnn                 Loopcount for test.  Adjust for reasonable total running time.\n"
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-iloop n                   Inner Loop Count.  Default = 1.  Number of calls to function\n"
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "                               under test at each call point.  For measuring test overhead.\n"
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-terse                     Terse numbers-only output.  Intended for use by scripts.\n"
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-dump                      Display stuff.\n"
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-capi                      Use C APIs instead of C++ APIs (currently not working)\n"
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-next                      Do the next test\n"
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "-isBound                   Do the isBound test\n"
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ;
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h>
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <string.h>
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdlib.h>
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <math.h>
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <locale.h>
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <errno.h>
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <sys/stat.h>
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/utypes.h>
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/ucol.h>
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/ucoleitr.h>
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/uloc.h>
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/ustring.h>
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/ures.h>
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/uchar.h>
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/ucnv.h>
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/utf8.h>
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/brkiter.h>
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
72103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#if U_PLATFORM_HAS_WIN32_API
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <windows.h>
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  Stubs for Windows API functions when building on UNIXes.
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <sys/time.h>
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruunsigned long timeGetTime() {
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    struct timeval t;
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    gettimeofday(&t, 0);
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    unsigned long val = t.tv_sec * 1000;  // Let it overflow.  Who cares.
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    val += t.tv_usec / 1000;
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return val;
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAKELCID(a,b) 0
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  Command line option variables
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     These global variables are set according to the options specified
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     on the command line by the user.
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruchar * opt_fName      = 0;
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruchar * opt_locale     = "en_US";
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint    opt_langid     = 0;         // Defaults to value corresponding to opt_locale.
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruchar * opt_rules      = 0;
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool  opt_help       = FALSE;
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint    opt_time       = 0;
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint    opt_loopCount  = 0;
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint    opt_passesCount= 1;
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool  opt_terse      = FALSE;
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool  opt_icu        = TRUE;
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool  opt_win        = FALSE;      // Run with Windows native functions.
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool  opt_unix       = FALSE;      // Run with UNIX strcoll, strxfrm functions.
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool  opt_mac        = FALSE;      // Run with MacOSX word break services.
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool  opt_uselen     = FALSE;
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool  opt_dump       = FALSE;
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool  opt_char       = FALSE;
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool  opt_word       = FALSE;
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool  opt_line       = FALSE;
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool  opt_sentence   = FALSE;
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool  opt_capi       = FALSE;
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool  opt_next       = FALSE;
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool  opt_isBound    = FALSE;
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//   Definitions for the command line options
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct OptSpec {
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char *name;
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    enum {FLAG, NUM, STRING} type;
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void *pVar;
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruOptSpec opts[] = {
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-file",        OptSpec::STRING, &opt_fName},
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-locale",      OptSpec::STRING, &opt_locale},
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-langid",      OptSpec::NUM,    &opt_langid},
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-win",         OptSpec::FLAG,   &opt_win},
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-unix",        OptSpec::FLAG,   &opt_unix},
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-mac",         OptSpec::FLAG,   &opt_mac},
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-uselen",      OptSpec::FLAG,   &opt_uselen},
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-loop",        OptSpec::NUM,    &opt_loopCount},
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-time",        OptSpec::NUM,    &opt_time},
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-passes",      OptSpec::NUM,    &opt_passesCount},
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-char",        OptSpec::FLAG,   &opt_char},
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-word",        OptSpec::FLAG,   &opt_word},
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-line",        OptSpec::FLAG,   &opt_line},
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-sentence",    OptSpec::FLAG,   &opt_sentence},
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-terse",       OptSpec::FLAG,   &opt_terse},
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-dump",        OptSpec::FLAG,   &opt_dump},
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-capi",        OptSpec::FLAG,   &opt_capi},
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-next",        OptSpec::FLAG,   &opt_next},
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-isBound",     OptSpec::FLAG,   &opt_isBound},
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-help",        OptSpec::FLAG,   &opt_help},
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {"-?",           OptSpec::FLAG,   &opt_help},
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {0, OptSpec::FLAG, 0}
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//---------------------------------------------------------------------------
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  Global variables pointing to and describing the test file
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//---------------------------------------------------------------------------
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//DWORD          gWinLCID;
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruBreakIterator *brkit = NULL;
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUChar *text = NULL;
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t textSize = 0;
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
168103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#if U_PLATFORM_IS_DARWIN_BASED
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <ApplicationServices/ApplicationServices.h>
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum{
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  kUCTextBreakAllMask = (kUCTextBreakClusterMask | kUCTextBreakWordMask | kUCTextBreakLineMask)
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    };
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUCTextBreakType breakTypes[4] = {kUCTextBreakCharMask, kUCTextBreakClusterMask, kUCTextBreakWordMask, kUCTextBreakLineMask};
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTextBreakLocatorRef breakRef;
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUCTextBreakType macBreakType;
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid createMACBrkIt() {
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  OSStatus status = noErr;
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  LocaleRef lref;
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  status = LocaleRefFromLocaleString(opt_locale, &lref);
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  status = UCCreateTextBreakLocator(lref, 0, kUCTextBreakAllMask, (TextBreakLocatorRef*)&breakRef);
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  if(opt_char == TRUE) {
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    macBreakType = kUCTextBreakClusterMask;
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  } else if(opt_word == TRUE) {
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    macBreakType = kUCTextBreakWordMask;
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  } else if(opt_line == TRUE) {
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    macBreakType = kUCTextBreakLineMask;
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  } else if(opt_sentence == TRUE) {
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // error
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // brkit = BreakIterator::createSentenceInstance(opt_locale, status);
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  } else {
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // default is character iterator
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    macBreakType = kUCTextBreakClusterMask;
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      }
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid createICUBrkIt() {
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  //
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  //  Set up an ICU break iterator
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  //
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  UErrorCode          status = U_ZERO_ERROR;
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  if(opt_char == TRUE) {
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    brkit = BreakIterator::createCharacterInstance(opt_locale, status);
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  } else if(opt_word == TRUE) {
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    brkit = BreakIterator::createWordInstance(opt_locale, status);
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  } else if(opt_line == TRUE) {
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    brkit = BreakIterator::createLineInstance(opt_locale, status);
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  } else if(opt_sentence == TRUE) {
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    brkit = BreakIterator::createSentenceInstance(opt_locale, status);
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  } else {
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // default is character iterator
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    brkit = BreakIterator::createCharacterInstance(opt_locale, status);
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  if (status==U_USING_DEFAULT_WARNING && opt_terse==FALSE) {
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", opt_locale);
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  if (status==U_USING_FALLBACK_WARNING && opt_terse==FALSE) {
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", opt_locale);
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//---------------------------------------------------------------------------
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  ProcessOptions()    Function to read the command line options.
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//---------------------------------------------------------------------------
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool ProcessOptions(int argc, const char **argv, OptSpec opts[])
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int         i;
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int         argNum;
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char  *pArgName;
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    OptSpec    *pOpt;
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for (argNum=1; argNum<argc; argNum++) {
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pArgName = argv[argNum];
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for (pOpt = opts;  pOpt->name != 0; pOpt++) {
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (strcmp(pOpt->name, pArgName) == 0) {
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                switch (pOpt->type) {
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                case OptSpec::FLAG:
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *(UBool *)(pOpt->pVar) = TRUE;
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                case OptSpec::STRING:
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    argNum ++;
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if (argNum >= argc) {
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        return FALSE;
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *(const char **)(pOpt->pVar)  = argv[argNum];
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                case OptSpec::NUM:
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    argNum ++;
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if (argNum >= argc) {
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        return FALSE;
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    char *endp;
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    i = strtol(argv[argNum], &endp, 0);
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if (endp == argv[argNum]) {
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        fprintf(stderr, "integer value expected for \"%s\" option.\n", pOpt->name);
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        return FALSE;
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *(int *)(pOpt->pVar) = i;
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (pOpt->name == 0)
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return FALSE;
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querureturn TRUE;
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid doForwardTest() {
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  if (opt_terse == FALSE) {
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("Doing the forward test\n");
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t noBreaks = 0;
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t i = 0;
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  unsigned long startTime = timeGetTime();
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  unsigned long elapsedTime = 0;
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  if(opt_icu) {
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    createICUBrkIt();
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    brkit->setText(UnicodeString(text, textSize));
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    brkit->first();
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (opt_terse == FALSE) {
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      printf("Warmup\n");
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int j;
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while((j = brkit->next()) != BreakIterator::DONE) {
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      noBreaks++;
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      //fprintf(stderr, "%d ", j);
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (opt_terse == FALSE) {
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      printf("Measure\n");
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    startTime = timeGetTime();
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i = 0; i < opt_loopCount; i++) {
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      brkit->first();
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      while(brkit->next() != BreakIterator::DONE) {
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      }
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    elapsedTime = timeGetTime()-startTime;
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  } else if(opt_mac) {
312103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#if U_PLATFORM_IS_DARWIN_BASED
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    createMACBrkIt();
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UniChar* filePtr = text;
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    OSStatus status = noErr;
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UniCharCount startOffset = 0, breakOffset = 0, numUniChars = textSize;
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    startOffset = 0;
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //printf("\t---Search forward--\n");
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while (startOffset < numUniChars)
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru	status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdgeMask, filePtr, numUniChars,
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                               startOffset, &breakOffset);
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      //require_action(status == noErr, EXIT, printf( "**UCFindTextBreak failed: startOffset %d, status %d\n", (int)startOffset, (int)status));
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      //require_action((breakOffset <= numUniChars),EXIT, printf("**UCFindTextBreak breakOffset too big: startOffset %d, breakOffset %d\n", (int)startOffset, (int)breakOffset));
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      // Output break
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      //printf("\t%d\n", (int)breakOffset);
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      // Increment counters
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru	noBreaks++;
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      startOffset = breakOffset;
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    startTime = timeGetTime();
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i = 0; i < opt_loopCount; i++) {
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      startOffset = 0;
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      while (startOffset < numUniChars)
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru	{
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru	  status = UCFindTextBreak(breakRef, macBreakType, kUCTextBreakLeadingEdgeMask, filePtr, numUniChars,
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru				   startOffset, &breakOffset);
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru	  // Increment counters
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru	  startOffset = breakOffset;
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru	}
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    elapsedTime = timeGetTime()-startTime;
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCDisposeTextBreakLocator(&breakRef);
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  if (opt_terse == FALSE) {
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCount));
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize));
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreaks));
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      printf("forward break iteration average loop time %d\n", loopTime);
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      printf("number of code units %d average time per code unit %d\n", textSize, timePerCU);
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      printf("number of breaks %d average time per break %d\n", noBreaks, timePerBreak);
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  } else {
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize);
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid doIsBoundTest() {
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t noBreaks = 0, hit = 0;
370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t i = 0, j = 0;
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  unsigned long startTime = timeGetTime();
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  unsigned long elapsedTime = 0;
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  createICUBrkIt();
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  brkit->setText(UnicodeString(text, textSize));
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  brkit->first();
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  for(j = 0; j < textSize; j++) {
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(brkit->isBoundary(j)) {
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      noBreaks++;
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      //fprintf(stderr, "%d ", j);
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  /*
383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  while(brkit->next() != BreakIterator::DONE) {
384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    noBreaks++;
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  */
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  startTime = timeGetTime();
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  for(i = 0; i < opt_loopCount; i++) {
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(j = 0; j < textSize; j++) {
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      if(brkit->isBoundary(j)) {
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        hit++;
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      }
394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  elapsedTime = timeGetTime()-startTime;
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t loopTime = (int)(float(1000) * ((float)elapsedTime/(float)opt_loopCount));
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  if (opt_terse == FALSE) {
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      int32_t timePerCU = (int)(float(1000) * ((float)loopTime/(float)textSize));
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      int32_t timePerBreak = (int)(float(1000) * ((float)loopTime/(float)noBreaks));
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      printf("forward break iteration average loop time %d\n", loopTime);
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      printf("number of code units %d average time per code unit %d\n", textSize, timePerCU);
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      printf("number of breaks %d average time per break %d\n", noBreaks, timePerBreak);
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  } else {
406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      printf("time=%d\nevents=%d\nsize=%d\n", elapsedTime, noBreaks, textSize);
407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------------------
411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//   UnixConvert   -- Convert the lines of the file to the encoding for UNIX
413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                    Since it appears that Unicode support is going in the general
414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                    direction of the use of UTF-8 locales, that is the approach
415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                    that is used here.
416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------------------
418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid  UnixConvert() {
419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0
420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int    line;
421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter   *cvrtr;    // An ICU code page converter.
423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode    status = U_ZERO_ERROR;
424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cvrtr = ucnv_open("utf-8", &status);    // we are just doing UTF-8 locales for now.
427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(status)) {
428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fprintf(stderr, "ICU Converter open failed.: %d\n", &status);
429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        exit(-1);
430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // redo for unix
432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for (line=0; line < gNumFileLines; line++) {
433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int sizeNeeded = ucnv_fromUChars(cvrtr,
434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         0,            // ptr to target buffer.
435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         0,            // length of target buffer.
436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         gFileLines[line].name,
437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         -1,           //  source is null terminated
438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         &status);
439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (status != U_BUFFER_OVERFLOW_ERROR && status != U_ZERO_ERROR) {
440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fprintf(stderr, "Conversion from Unicode, something is wrong.\n");
441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            exit(-1);
442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        status = U_ZERO_ERROR;
444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        gFileLines[line].unixName = new char[sizeNeeded+1];
445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sizeNeeded = ucnv_fromUChars(cvrtr,
446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         gFileLines[line].unixName, // ptr to target buffer.
447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         sizeNeeded+1, // length of target buffer.
448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         gFileLines[line].name,
449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         -1,           //  source is null terminated
450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         &status);
451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (U_FAILURE(status)) {
452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fprintf(stderr, "ICU Conversion Failed.: %d\n", status);
453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            exit(-1);
454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        gFileLines[line].unixName[sizeNeeded] = 0;
456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    };
457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_close(cvrtr);
458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------------------
463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  class UCharFile   Class to hide all the gorp to read a file in
465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                    and produce a stream of UChars.
466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------------------
468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UCharFile {
469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCharFile(const char *fileName);
471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ~UCharFile();
472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar   get();
473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool   eof() {return fEof;};
474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool   error() {return fError;};
475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t size() { return fFileSize; };
476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate:
478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCharFile (const UCharFile &other) {};                         // No copy constructor.
479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCharFile & operator = (const UCharFile &other) {return *this;};   // No assignment op
480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    FILE         *fFile;
482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char   *fName;
483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool        fEof;
484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool        fError;
485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar        fPending2ndSurrogate;
486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t      fFileSize;
487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    enum {UTF16LE, UTF16BE, UTF8} fEncoding;
489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUCharFile::UCharFile(const char * fileName) {
492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fEof                 = FALSE;
493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fError               = FALSE;
494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fName                = fileName;
495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    struct stat buf;
496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t result = stat(fileName, &buf);
497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(result != 0) {
498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      fprintf(stderr, "Error getting info\n");
499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      fFileSize = -1;
500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      fFileSize = buf.st_size;
502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fFile                = fopen(fName, "rb");
504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fPending2ndSurrogate = 0;
505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (fFile == NULL) {
506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fprintf(stderr, "Can not open file \"%s\"\n", opt_fName);
507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fError = TRUE;
508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //  Look for the byte order mark at the start of the file.
512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int BOMC1, BOMC2, BOMC3;
514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    BOMC1 = fgetc(fFile);
515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    BOMC2 = fgetc(fFile);
516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (BOMC1 == 0xff && BOMC2 == 0xfe) {
518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fEncoding = UTF16LE; }
519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    else if (BOMC1 == 0xfe && BOMC2 == 0xff) {
520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fEncoding = UTF16BE; }
521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    else if (BOMC1 == 0xEF && BOMC2 == 0xBB && (BOMC3 = fgetc(fFile)) == 0xBF ) {
522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fEncoding = UTF8; }
523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    else
524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fprintf(stderr, "collperf:  file \"%s\" encoding must be UTF-8 or UTF-16, and "
526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            "must include a BOM.\n", fileName);
527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fError = true;
528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUCharFile::~UCharFile() {
534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fclose(fFile);
535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUChar UCharFile::get() {
540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar   c;
541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    switch (fEncoding) {
542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UTF16LE:
543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int  cL, cH;
545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cL = fgetc(fFile);
546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cH = fgetc(fFile);
547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c  = cL  | (cH << 8);
548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (cH == EOF) {
549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c   = 0;
550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                fEof = TRUE;
551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UTF16BE:
555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int  cL, cH;
557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cH = fgetc(fFile);
558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cL = fgetc(fFile);
559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c  = cL  | (cH << 8);
560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (cL == EOF) {
561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c   = 0;
562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                fEof = TRUE;
563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case UTF8:
567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (fPending2ndSurrogate != 0) {
569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c = fPending2ndSurrogate;
570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                fPending2ndSurrogate = 0;
571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int ch = fgetc(fFile);   // Note:  c and ch are separate cause eof test doesn't work on UChar type.
575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (ch == EOF) {
576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c = 0;
577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                fEof = TRUE;
578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (ch <= 0x7f) {
582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // It's ascii.  No further utf-8 conversion.
583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c = ch;
584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // Figure out the lenght of the char and read the rest of the bytes
588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            //   into a temp array.
589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int nBytes;
590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (ch >= 0xF0) {nBytes=4;}
591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            else if (ch >= 0xE0) {nBytes=3;}
592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            else if (ch >= 0xC0) {nBytes=2;}
593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            else {
594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                fprintf(stderr, "not likely utf-8 encoded file %s contains corrupt data at offset %d.\n", fName, ftell(fFile));
595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                fError = TRUE;
596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return 0;
597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            unsigned char  bytes[10];
600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            bytes[0] = (unsigned char)ch;
601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int i;
602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            for (i=1; i<nBytes; i++) {
603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                bytes[i] = fgetc(fFile);
604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if (bytes[i] < 0x80 || bytes[i] >= 0xc0) {
605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    fprintf(stderr, "utf-8 encoded file %s contains corrupt data at offset %d. Expected %d bytes, byte %d is invalid. First byte is %02X\n", fName, ftell(fFile), nBytes, i, ch);
606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    fError = TRUE;
607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    return 0;
608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // Convert the bytes from the temp array to a Unicode char.
612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            i = 0;
613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            uint32_t  cp;
61454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            U8_NEXT_UNSAFE(bytes, i, cp);
615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c = (UChar)cp;
616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (cp >= 0x10000) {
618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // The code point needs to be broken up into a utf-16 surrogate pair.
619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                //  Process first half this time through the main loop, and
620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                //   remember the other half for the next time through.
621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                UChar utf16Buf[3];
622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                i = 0;
623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                UTF16_APPEND_CHAR_UNSAFE(utf16Buf, i, cp);
624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                fPending2ndSurrogate = utf16Buf[1];
625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c = utf16Buf[0];
626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        };
629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return c;
631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------------------
635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//    Main   --  process command line, read in and pre-process the test file,
637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                 call other functions to do the actual tests.
638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//----------------------------------------------------------------------------------------
640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint main(int argc, const char** argv) {
641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (ProcessOptions(argc, argv, opts) != TRUE || opt_help || opt_fName == 0) {
642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf(gUsageString);
643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        exit (1);
644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Make sure that we've only got one API selected.
646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (opt_mac || opt_unix || opt_win) opt_icu = FALSE;
647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (opt_mac || opt_unix) opt_win = FALSE;
648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (opt_mac) opt_unix = FALSE;
649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode          status = U_ZERO_ERROR;
651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //  Set up a Windows LCID
656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  /*
658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (opt_langid != 0) {
659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        gWinLCID = MAKELCID(opt_langid, SORT_DEFAULT);
660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    else {
662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        gWinLCID = uloc_getLCID(opt_locale);
663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  */
665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //  Set the UNIX locale
668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (opt_unix) {
670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (setlocale(LC_ALL, opt_locale) == 0) {
671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fprintf(stderr, "setlocale(LC_ALL, %s) failed.\n", opt_locale);
672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            exit(-1);
673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Read in  the input file.
677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //   File assumed to be utf-16.
678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //   Lines go onto heap buffers.  Global index array to line starts is created.
679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //   Lines themselves are null terminated.
680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCharFile f(opt_fName);
683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (f.error()) {
684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        exit(-1);
685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t fileSize = f.size();
687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const int STARTSIZE = 70000;
688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t bufSize = 0;
689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t charCount = 0;
690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(fileSize != -1) {
691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      text = (UChar *)malloc(fileSize*sizeof(UChar));
692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      bufSize = fileSize;
693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      text = (UChar *)malloc(STARTSIZE*sizeof(UChar));
695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      bufSize = STARTSIZE;
696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(text == NULL) {
698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      fprintf(stderr, "Allocating buffer failed\n");
699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      exit(-1);
700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //  Read the file, split into lines, and save in memory.
704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //  Loop runs once per utf-16 value from the input file,
705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //    (The number of bytes read from file per loop iteration depends on external encoding.)
706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for (;;) {
707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar c = f.get();
709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(f.eof()) {
710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          break;
711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (f.error()){
713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          exit(-1);
714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // We now have a good UTF-16 value in c.
716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        text[charCount++] = c;
717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(charCount == bufSize) {
718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          text = (UChar *)realloc(text, 2*bufSize*sizeof(UChar));
719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          if(text == NULL) {
720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fprintf(stderr, "Reallocating buffer failed\n");
721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            exit(-1);
722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          }
723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          bufSize *= 2;
724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (opt_terse == FALSE) {
729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("file \"%s\", %d charCount code units.\n", opt_fName, charCount);
730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    textSize = charCount;
733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //  Dump file contents if requested.
739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (opt_dump) {
741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      // dump file, etc... possibly
742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //  We've got the file read into memory.  Go do something with it.
747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i = 0;
749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i = 0; i < opt_passesCount; i++) {
750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      if(opt_loopCount != 0) {
751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(opt_next) {
752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          doForwardTest();
753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(opt_isBound) {
754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          doIsBoundTest();
755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          doForwardTest();
757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      } else if(opt_time != 0) {
759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      }
761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  if(text != NULL) {
764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    free(text);
765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(brkit != NULL) {
767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      delete brkit;
768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return 0;
771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
772