1#!/usr/local/bin/perl
2# *******************************************************************************
3# * Copyright (C) 2002-2007 International Business Machines Corporation and     *
4# * others. All Rights Reserved.                                                *
5# *******************************************************************************
6
7use strict;
8
9# Assume we are running within the icu4j root directory
10use lib 'src/com/ibm/icu/dev/test/perf';
11use Dataset;
12
13#---------------------------------------------------------------------
14# Test class
15my $TESTCLASS = 'com.ibm.icu.dev.test.perf.UnicodeSetPerf';
16
17# Methods to be tested.  Each pair represents a test method and
18# a baseline method which is used for comparison.
19my @METHODS  = (
20                ['UnicodeSetAdd',      'HashSetAdd'],
21                ['UnicodeSetContains', 'HashSetContains'],
22                ['UnicodeSetIterate',  'HashSetIterate']);
23
24# Patterns which define the set of characters used for testing.
25my @PATTERNS = (
26                '[:Lt:]',
27#               '[:Cn:]'
28               );
29
30my $CALIBRATE = 2;  # duration in seconds for initial calibration
31my $DURATION  = 10; # duration in seconds for each pass
32my $NUMPASSES = 4;  # number of passes.  If > 1 then the first pass
33                    # is discarded as a JIT warm-up pass.
34
35my $TABLEATTR = 'BORDER="1" CELLPADDING="4" CELLSPACING="0"';
36
37my $PLUS_MINUS = "±";
38
39if ($NUMPASSES < 3) {
40    die "Need at least 3 passes.  One is discarded (JIT warmup) and need two to have 1 degree of freedom (t distribution).";
41}
42
43my $OUT; # see out()
44
45main();
46
47#---------------------------------------------------------------------
48# ...
49sub main {
50    my $date = localtime;
51    my $title = "ICU4J Performance Test $date";
52
53    my $html = $date;
54    $html =~ s/://g; # ':' illegal
55    $html =~ s/\s*\d+$//; # delete year
56    $html =~ s/^\w+\s*//; # delete dow
57    $html = "perf $html.html";
58
59    open(HTML,">$html") or die "Can't write to $html: $!";
60
61    print HTML <<EOF;
62<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
63   "http://www.w3.org/TR/html4/strict.dtd">
64<HTML>
65   <HEAD>
66      <TITLE>$title</TITLE>
67   </HEAD>
68   <BODY>
69EOF
70    print HTML "<H1>$title</H1>\n";
71
72    print HTML "<H2>$TESTCLASS</H2>\n";
73
74    my $raw = "";
75
76    for my $methodPair (@METHODS) {
77
78        my $testMethod = $methodPair->[0];
79        my $baselineMethod = $methodPair->[1];
80
81        print HTML "<P><TABLE $TABLEATTR><TR><TD>\n";
82        print HTML "<P><B>$testMethod vs. $baselineMethod</B></P>\n";
83
84        print HTML "<P><TABLE $TABLEATTR BGCOLOR=\"#CCFFFF\">\n";
85        print HTML "<TR><TD>Pattern</TD><TD>$testMethod</TD>";
86        print HTML "<TD>$baselineMethod</TD><TD>Ratio</TD></TR>\n";
87
88        $OUT = '';
89
90        for my $pat (@PATTERNS) {
91            print HTML "<TR><TD>$pat</TD>\n";
92
93            out("<P><TABLE $TABLEATTR WIDTH=\"100%\">");
94
95            # measure the test method
96            out("<TR><TD>");
97            print "\n$testMethod $pat\n";
98            my $t = measure2($testMethod, $pat, -$DURATION);
99            out("</TD></TR>");
100            print HTML "<TD>", formatSeconds(4, $t->getMean(), $t->getError);
101            print HTML "/event</TD>\n";
102
103            # measure baseline method
104            out("<TR><TD>");
105            print "\nBegin $baselineMethod $pat\n";
106            my $b = measure2($baselineMethod, $pat, -$DURATION);
107            out("</TD></TR>");
108            print HTML "<TD>", formatSeconds(4, $b->getMean(), $t->getError);
109            print HTML "/event</TD>\n";
110
111            out("</TABLE></P>");
112
113            # output ratio
114            my $r = $t->divide($b);
115            my $mean = $r->getMean() - 1;
116            my $color = $mean < 0 ? "RED" : "BLACK";
117            print HTML "<TD><B><FONT COLOR=\"$color\">", formatPercent(3, $mean, $r->getError);
118            print HTML "</FONT></B></TD></TR>\n";
119        }
120
121        print HTML "</TABLE></P>\n";
122
123        print HTML "<P>Raw data:</P>\n";
124        print HTML $OUT;
125        print HTML "</TABLE></P>\n";
126    }
127
128    print HTML <<EOF;
129   </BODY>
130</HTML>
131EOF
132    close(HTML) or die "Can't close $html: $!";
133}
134
135#---------------------------------------------------------------------
136# Append text to the global variable $OUT
137sub out {
138    $OUT .= join('', @_);
139}
140
141#---------------------------------------------------------------------
142# Append text to the global variable $OUT
143sub outln {
144    $OUT .= join('', @_) . "\n";
145}
146
147#---------------------------------------------------------------------
148# Measure a given test method with a give test pattern using the
149# global run parameters.
150#
151# @param the method to run
152# @param the pattern defining characters to test
153# @param if >0 then the number of iterations per pass.  If <0 then
154#        (negative of) the number of seconds per pass.
155#
156# @return a Dataset object, scaled by iterations per pass and
157#         events per iteration, to give time per event
158#
159sub measure2 {
160    my @data = measure1(@_);
161    my $iterPerPass = shift(@data);
162    my $eventPerIter = shift(@data);
163
164    shift(@data) if (@data > 1); # discard first run
165
166    my $ds = Dataset->new(@data);
167    $ds->setScale(1.0e-3 / ($iterPerPass * $eventPerIter));
168    $ds;
169}
170
171#---------------------------------------------------------------------
172# Measure a given test method with a give test pattern using the
173# global run parameters.
174#
175# @param the method to run
176# @param the pattern defining characters to test
177# @param if >0 then the number of iterations per pass.  If <0 then
178#        (negative of) the number of seconds per pass.
179#
180# @return array of:
181#         [0] iterations per pass
182#         [1] events per iteration
183#         [2..] ms reported for each pass, in order
184#
185sub measure1 {
186    my $method = shift;
187    my $pat = shift;
188    my $iterCount = shift; # actually might be -seconds/pass
189
190    out("<P>Measuring $method using $pat, ");
191    if ($iterCount > 0) {
192        out("$iterCount iterations/pass, $NUMPASSES passes</P>\n");
193    } else {
194        out(-$iterCount, " seconds/pass, $NUMPASSES passes</P>\n");
195    }
196
197    # is $iterCount actually -seconds/pass?
198    if ($iterCount < 0) {
199
200        # calibrate: estimate ms/iteration
201        print "Calibrating...";
202        my @t = callJava($method, $pat, -$CALIBRATE, 1);
203        print "done.\n";
204
205        my @data = split(/\s+/, $t[0]->[2]);
206        $data[0] *= 1.0e+3;
207
208        my $timePerIter = 1.0e-3 * $data[0] / $data[1];
209
210        # determine iterations/pass
211        $iterCount = int(-$iterCount / $timePerIter + 0.5);
212
213        out("<P>Calibration pass ($CALIBRATE sec): ");
214        out("$data[0] ms, ");
215        out("$data[1] iterations = ");
216        out(formatSeconds(4, $timePerIter), "/iteration<BR>\n");
217    }
218
219    # run passes
220    print "Measuring $iterCount iterations x $NUMPASSES passes...";
221    my @t = callJava($method, $pat, $iterCount, $NUMPASSES);
222    print "done.\n";
223    my @ms = ();
224    my @b; # scratch
225    for my $a (@t) {
226        # $a->[0]: method name, corresponds to $method
227        # $a->[1]: 'begin' data, == $iterCount
228        # $a->[2]: 'end' data, of the form <ms> <loops> <eventsPerIter>
229        # $a->[3...]: gc messages from JVM during pass
230        @b = split(/\s+/, $a->[2]);
231        push(@ms, $b[0] * 1.0e+3);
232    }
233    my $eventsPerIter = $b[2];
234
235    out("Iterations per pass: $iterCount<BR>\n");
236    out("Events per iteration: $eventsPerIter<BR>\n");
237
238    my @ms_str = @ms;
239    $ms_str[0] .= " (discarded)" if (@ms_str > 1);
240    out("Raw times (ms/pass): ", join(", ", @ms_str), "<BR>\n");
241
242    ($iterCount, $eventsPerIter, @ms);
243}
244
245#---------------------------------------------------------------------
246# Invoke java to run $TESTCLASS, passing it the given parameters.
247#
248# @param the method to run
249# @param the number of iterations, or if negative, the duration
250#        in seconds.  If more than on pass is desired, pass in
251#        a string, e.g., "100 100 100".
252# @param the pattern defining characters to test
253#
254# @return an array of results.  Each result is an array REF
255#         describing one pass.  The array REF contains:
256#         ->[0]: The method name as reported
257#         ->[1]: The params on the '= <meth> begin ...' line
258#         ->[2]: The params on the '= <meth> end ...' line
259#         ->[3..]: GC messages from the JVM, if any
260#
261sub callJava {
262    my $method = shift;
263    my $pat = shift;
264    my $n = shift;
265    my $passes = shift;
266
267    my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n;
268
269    my $cmd = "java -cp classes $TESTCLASS $method $n -p $passes $pat";
270    print "[$cmd]\n"; # for debugging
271    open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
272    my @out;
273    while (<PIPE>) {
274        push(@out, $_);
275    }
276    close(PIPE) or die "Java failed: \"$cmd\"";
277
278    @out = grep(!/^\#/, @out);  # filter out comments
279
280    #print "[", join("\n", @out), "]\n";
281
282    my @results;
283    my $method = '';
284    my $data = [];
285    foreach (@out) {
286        next unless (/\S/);
287
288        if (/^=\s*(\w+)\s*(\w+)\s*(.*)/) {
289            my ($m, $state, $d) = ($1, $2, $3);
290            #print "$_ => [[$m $state $data]]\n";
291            if ($state eq 'begin') {
292                die "$method was begun but not finished" if ($method);
293                $method = $m;
294                push(@$data, $d);
295                push(@$data, ''); # placeholder for end data
296            } elsif ($state eq 'end') {
297                if ($m ne $method) {
298                    die "$method end does not match: $_";
299                }
300                $data->[1] = $d; # insert end data at [1]
301                #print "#$method:", join(";",@$data), "\n";
302                unshift(@$data, $method); # add method to start
303
304                push(@results, $data);
305                $method = '';
306                $data = [];
307            } else {
308                die "Can't parse: $_";
309            }
310        }
311
312        elsif (/^\[/) {
313            if ($method) {
314                push(@$data, $_);
315            } else {
316                # ignore extraneous GC notices
317            }
318        }
319
320        else {
321            die "Can't parse: $_";
322        }
323    }
324
325    die "$method was begun but not finished" if ($method);
326
327    @results;
328}
329
330#|#---------------------------------------------------------------------
331#|# Format a confidence interval, as given by a Dataset.  Output is as
332#|# as follows:
333#|#   241.23 - 241.98 => 241.5 +/- 0.3
334#|#   241.2 - 243.8 => 242 +/- 1
335#|#   211.0 - 241.0 => 226 +/- 15 or? 230 +/- 20
336#|#   220.3 - 234.3 => 227 +/- 7
337#|#   220.3 - 300.3 => 260 +/- 40
338#|#   220.3 - 1000 => 610 +/- 390 or? 600 +/- 400
339#|#   0.022 - 0.024 => 0.023 +/- 0.001
340#|#   0.022 - 0.032 => 0.027 +/- 0.005
341#|#   0.022 - 1.000 => 0.5 +/- 0.5
342#|# In other words, take one significant digit of the error value and
343#|# display the mean to the same precision.
344#|sub formatDataset {
345#|    my $ds = shift;
346#|    my $lower = $ds->getMean() - $ds->getError();
347#|    my $upper = $ds->getMean() + $ds->getError();
348#|    my $scale = 0;
349#|    # Find how many initial digits are the same
350#|    while ($lower < 1 ||
351#|           int($lower) == int($upper)) {
352#|        $lower *= 10;
353#|        $upper *= 10;
354#|        $scale++;
355#|    }
356#|    while ($lower >= 10 &&
357#|           int($lower) == int($upper)) {
358#|        $lower /= 10;
359#|        $upper /= 10;
360#|        $scale--;
361#|    }
362#|}
363
364#---------------------------------------------------------------------
365# Format a number, optionally with a +/- delta, to n significant
366# digits.
367#
368# @param significant digit, a value >= 1
369# @param multiplier
370# @param time in seconds to be formatted
371# @optional delta in seconds
372#
373# @return string of the form "23" or "23 +/- 10".
374#
375sub formatNumber {
376    my $sigdig = shift;
377    my $mult = shift;
378    my $a = shift;
379    my $delta = shift; # may be undef
380
381    my $result = formatSigDig($sigdig, $a*$mult);
382    if (defined($delta)) {
383        my $d = formatSigDig($sigdig, $delta*$mult);
384        # restrict PRECISION of delta to that of main number
385        if ($result =~ /\.(\d+)/) {
386            # TODO make this work for values with all significant
387            # digits to the left of the decimal, e.g., 1234000.
388
389            # TODO the other thing wrong with this is that it
390            # isn't rounding the $delta properly.  Have to put
391            # this logic into formatSigDig().
392            my $x = length($1);
393            $d =~ s/\.(\d{$x})\d+/.$1/;
394        }
395        $result .= " $PLUS_MINUS " . $d;
396    }
397    $result;
398}
399
400#---------------------------------------------------------------------
401# Format a time, optionally with a +/- delta, to n significant
402# digits.
403#
404# @param significant digit, a value >= 1
405# @param time in seconds to be formatted
406# @optional delta in seconds
407#
408# @return string of the form "23 ms" or "23 +/- 10 ms".
409#
410sub formatSeconds {
411    my $sigdig = shift;
412    my $a = shift;
413    my $delta = shift; # may be undef
414
415    my @MULT = (1   , 1e3,  1e6,  1e9);
416    my @SUFF = ('s' , 'ms', 'us', 'ns');
417
418    # Determine our scale
419    my $i = 0;
420    ++$i while ($a*$MULT[$i] < 1 && $i < @MULT);
421
422    formatNumber($sigdig, $MULT[$i], $a, $delta) . ' ' . $SUFF[$i];
423}
424
425#---------------------------------------------------------------------
426# Format a percentage, optionally with a +/- delta, to n significant
427# digits.
428#
429# @param significant digit, a value >= 1
430# @param value to be formatted, as a fraction, e.g. 0.5 for 50%
431# @optional delta, as a fraction
432#
433# @return string of the form "23 %" or "23 +/- 10 %".
434#
435sub formatPercent {
436    my $sigdig = shift;
437    my $a = shift;
438    my $delta = shift; # may be undef
439
440    formatNumber($sigdig, 100, $a, $delta) . ' %';
441}
442
443#---------------------------------------------------------------------
444# Format a number to n significant digits without using exponential
445# notation.
446#
447# @param significant digit, a value >= 1
448# @param number to be formatted
449#
450# @return string of the form "1234" "12.34" or "0.001234".  If
451#         number was negative, prefixed by '-'.
452#
453sub formatSigDig {
454    my $n = shift() - 1;
455    my $a = shift;
456
457    local $_ = sprintf("%.${n}e", $a);
458    my $sign = (s/^-//) ? '-' : '';
459
460    my $a_e;
461    my $result;
462    if (/^(\d)\.(\d+)e([-+]\d+)$/) {
463        my ($d, $dn, $e) = ($1, $2, $3);
464        $a_e = $e;
465        $d .= $dn;
466        $e++;
467        $d .= '0' while ($e > length($d));
468        while ($e < 1) {
469            $e++;
470            $d = '0' . $d;
471        }
472        if ($e == length($d)) {
473            $result = $sign . $d;
474        } else {
475            $result = $sign . substr($d, 0, $e) . '.' . substr($d, $e);
476        }
477    } else {
478        die "Can't parse $_";
479    }
480    $result;
481}
482
483#eof
484