1#!/usr/local/bin/perl
2# * © 2016 and later: Unicode, Inc. and others.
3# * License & terms of use: http://www.unicode.org/copyright.html#License
4# *******************************************************************************
5# * Copyright (C) 2002-2007 International Business Machines Corporation and     *
6# * others. All Rights Reserved.                                                *
7# *******************************************************************************
8
9use strict;
10
11# Assume we are running within the icu4j root directory
12use lib 'src/com/ibm/icu/dev/test/perf';
13use Dataset;
14
15#---------------------------------------------------------------------
16# Test class
17my $TESTCLASS = 'com.ibm.icu.dev.test.perf.UnicodeSetPerf';
18
19# Methods to be tested.  Each pair represents a test method and
20# a baseline method which is used for comparison.
21my @METHODS  = (
22                ['UnicodeSetAdd',      'HashSetAdd'],
23                ['UnicodeSetContains', 'HashSetContains'],
24                ['UnicodeSetIterate',  'HashSetIterate']);
25
26# Patterns which define the set of characters used for testing.
27my @PATTERNS = (
28                '[:Lt:]',
29#               '[:Cn:]'
30               );
31
32my $CALIBRATE = 2;  # duration in seconds for initial calibration
33my $DURATION  = 10; # duration in seconds for each pass
34my $NUMPASSES = 4;  # number of passes.  If > 1 then the first pass
35                    # is discarded as a JIT warm-up pass.
36
37my $TABLEATTR = 'BORDER="1" CELLPADDING="4" CELLSPACING="0"';
38
39my $PLUS_MINUS = "±";
40
41if ($NUMPASSES < 3) {
42    die "Need at least 3 passes.  One is discarded (JIT warmup) and need two to have 1 degree of freedom (t distribution).";
43}
44
45my $OUT; # see out()
46
47main();
48
49#---------------------------------------------------------------------
50# ...
51sub main {
52    my $date = localtime;
53    my $title = "ICU4J Performance Test $date";
54
55    my $html = $date;
56    $html =~ s/://g; # ':' illegal
57    $html =~ s/\s*\d+$//; # delete year
58    $html =~ s/^\w+\s*//; # delete dow
59    $html = "perf $html.html";
60
61    open(HTML,">$html") or die "Can't write to $html: $!";
62
63    print HTML <<EOF;
64<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
65   "http://www.w3.org/TR/html4/strict.dtd">
66<HTML>
67   <HEAD>
68      <TITLE>$title</TITLE>
69   </HEAD>
70   <BODY>
71EOF
72    print HTML "<H1>$title</H1>\n";
73
74    print HTML "<H2>$TESTCLASS</H2>\n";
75
76    my $raw = "";
77
78    for my $methodPair (@METHODS) {
79
80        my $testMethod = $methodPair->[0];
81        my $baselineMethod = $methodPair->[1];
82
83        print HTML "<P><TABLE $TABLEATTR><TR><TD>\n";
84        print HTML "<P><B>$testMethod vs. $baselineMethod</B></P>\n";
85
86        print HTML "<P><TABLE $TABLEATTR BGCOLOR=\"#CCFFFF\">\n";
87        print HTML "<TR><TD>Pattern</TD><TD>$testMethod</TD>";
88        print HTML "<TD>$baselineMethod</TD><TD>Ratio</TD></TR>\n";
89
90        $OUT = '';
91
92        for my $pat (@PATTERNS) {
93            print HTML "<TR><TD>$pat</TD>\n";
94
95            out("<P><TABLE $TABLEATTR WIDTH=\"100%\">");
96
97            # measure the test method
98            out("<TR><TD>");
99            print "\n$testMethod $pat\n";
100            my $t = measure2($testMethod, $pat, -$DURATION);
101            out("</TD></TR>");
102            print HTML "<TD>", formatSeconds(4, $t->getMean(), $t->getError);
103            print HTML "/event</TD>\n";
104
105            # measure baseline method
106            out("<TR><TD>");
107            print "\nBegin $baselineMethod $pat\n";
108            my $b = measure2($baselineMethod, $pat, -$DURATION);
109            out("</TD></TR>");
110            print HTML "<TD>", formatSeconds(4, $b->getMean(), $t->getError);
111            print HTML "/event</TD>\n";
112
113            out("</TABLE></P>");
114
115            # output ratio
116            my $r = $t->divide($b);
117            my $mean = $r->getMean() - 1;
118            my $color = $mean < 0 ? "RED" : "BLACK";
119            print HTML "<TD><B><FONT COLOR=\"$color\">", formatPercent(3, $mean, $r->getError);
120            print HTML "</FONT></B></TD></TR>\n";
121        }
122
123        print HTML "</TABLE></P>\n";
124
125        print HTML "<P>Raw data:</P>\n";
126        print HTML $OUT;
127        print HTML "</TABLE></P>\n";
128    }
129
130    print HTML <<EOF;
131   </BODY>
132</HTML>
133EOF
134    close(HTML) or die "Can't close $html: $!";
135}
136
137#---------------------------------------------------------------------
138# Append text to the global variable $OUT
139sub out {
140    $OUT .= join('', @_);
141}
142
143#---------------------------------------------------------------------
144# Append text to the global variable $OUT
145sub outln {
146    $OUT .= join('', @_) . "\n";
147}
148
149#---------------------------------------------------------------------
150# Measure a given test method with a give test pattern using the
151# global run parameters.
152#
153# @param the method to run
154# @param the pattern defining characters to test
155# @param if >0 then the number of iterations per pass.  If <0 then
156#        (negative of) the number of seconds per pass.
157#
158# @return a Dataset object, scaled by iterations per pass and
159#         events per iteration, to give time per event
160#
161sub measure2 {
162    my @data = measure1(@_);
163    my $iterPerPass = shift(@data);
164    my $eventPerIter = shift(@data);
165
166    shift(@data) if (@data > 1); # discard first run
167
168    my $ds = Dataset->new(@data);
169    $ds->setScale(1.0e-3 / ($iterPerPass * $eventPerIter));
170    $ds;
171}
172
173#---------------------------------------------------------------------
174# Measure a given test method with a give test pattern using the
175# global run parameters.
176#
177# @param the method to run
178# @param the pattern defining characters to test
179# @param if >0 then the number of iterations per pass.  If <0 then
180#        (negative of) the number of seconds per pass.
181#
182# @return array of:
183#         [0] iterations per pass
184#         [1] events per iteration
185#         [2..] ms reported for each pass, in order
186#
187sub measure1 {
188    my $method = shift;
189    my $pat = shift;
190    my $iterCount = shift; # actually might be -seconds/pass
191
192    out("<P>Measuring $method using $pat, ");
193    if ($iterCount > 0) {
194        out("$iterCount iterations/pass, $NUMPASSES passes</P>\n");
195    } else {
196        out(-$iterCount, " seconds/pass, $NUMPASSES passes</P>\n");
197    }
198
199    # is $iterCount actually -seconds/pass?
200    if ($iterCount < 0) {
201
202        # calibrate: estimate ms/iteration
203        print "Calibrating...";
204        my @t = callJava($method, $pat, -$CALIBRATE, 1);
205        print "done.\n";
206
207        my @data = split(/\s+/, $t[0]->[2]);
208        $data[0] *= 1.0e+3;
209
210        my $timePerIter = 1.0e-3 * $data[0] / $data[1];
211
212        # determine iterations/pass
213        $iterCount = int(-$iterCount / $timePerIter + 0.5);
214
215        out("<P>Calibration pass ($CALIBRATE sec): ");
216        out("$data[0] ms, ");
217        out("$data[1] iterations = ");
218        out(formatSeconds(4, $timePerIter), "/iteration<BR>\n");
219    }
220
221    # run passes
222    print "Measuring $iterCount iterations x $NUMPASSES passes...";
223    my @t = callJava($method, $pat, $iterCount, $NUMPASSES);
224    print "done.\n";
225    my @ms = ();
226    my @b; # scratch
227    for my $a (@t) {
228        # $a->[0]: method name, corresponds to $method
229        # $a->[1]: 'begin' data, == $iterCount
230        # $a->[2]: 'end' data, of the form <ms> <loops> <eventsPerIter>
231        # $a->[3...]: gc messages from JVM during pass
232        @b = split(/\s+/, $a->[2]);
233        push(@ms, $b[0] * 1.0e+3);
234    }
235    my $eventsPerIter = $b[2];
236
237    out("Iterations per pass: $iterCount<BR>\n");
238    out("Events per iteration: $eventsPerIter<BR>\n");
239
240    my @ms_str = @ms;
241    $ms_str[0] .= " (discarded)" if (@ms_str > 1);
242    out("Raw times (ms/pass): ", join(", ", @ms_str), "<BR>\n");
243
244    ($iterCount, $eventsPerIter, @ms);
245}
246
247#---------------------------------------------------------------------
248# Invoke java to run $TESTCLASS, passing it the given parameters.
249#
250# @param the method to run
251# @param the number of iterations, or if negative, the duration
252#        in seconds.  If more than on pass is desired, pass in
253#        a string, e.g., "100 100 100".
254# @param the pattern defining characters to test
255#
256# @return an array of results.  Each result is an array REF
257#         describing one pass.  The array REF contains:
258#         ->[0]: The method name as reported
259#         ->[1]: The params on the '= <meth> begin ...' line
260#         ->[2]: The params on the '= <meth> end ...' line
261#         ->[3..]: GC messages from the JVM, if any
262#
263sub callJava {
264    my $method = shift;
265    my $pat = shift;
266    my $n = shift;
267    my $passes = shift;
268
269    my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n;
270
271    my $cmd = "java -cp classes $TESTCLASS $method $n -p $passes $pat";
272    print "[$cmd]\n"; # for debugging
273    open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
274    my @out;
275    while (<PIPE>) {
276        push(@out, $_);
277    }
278    close(PIPE) or die "Java failed: \"$cmd\"";
279
280    @out = grep(!/^\#/, @out);  # filter out comments
281
282    #print "[", join("\n", @out), "]\n";
283
284    my @results;
285    my $method = '';
286    my $data = [];
287    foreach (@out) {
288        next unless (/\S/);
289
290        if (/^=\s*(\w+)\s*(\w+)\s*(.*)/) {
291            my ($m, $state, $d) = ($1, $2, $3);
292            #print "$_ => [[$m $state $data]]\n";
293            if ($state eq 'begin') {
294                die "$method was begun but not finished" if ($method);
295                $method = $m;
296                push(@$data, $d);
297                push(@$data, ''); # placeholder for end data
298            } elsif ($state eq 'end') {
299                if ($m ne $method) {
300                    die "$method end does not match: $_";
301                }
302                $data->[1] = $d; # insert end data at [1]
303                #print "#$method:", join(";",@$data), "\n";
304                unshift(@$data, $method); # add method to start
305
306                push(@results, $data);
307                $method = '';
308                $data = [];
309            } else {
310                die "Can't parse: $_";
311            }
312        }
313
314        elsif (/^\[/) {
315            if ($method) {
316                push(@$data, $_);
317            } else {
318                # ignore extraneous GC notices
319            }
320        }
321
322        else {
323            die "Can't parse: $_";
324        }
325    }
326
327    die "$method was begun but not finished" if ($method);
328
329    @results;
330}
331
332#|#---------------------------------------------------------------------
333#|# Format a confidence interval, as given by a Dataset.  Output is as
334#|# as follows:
335#|#   241.23 - 241.98 => 241.5 +/- 0.3
336#|#   241.2 - 243.8 => 242 +/- 1
337#|#   211.0 - 241.0 => 226 +/- 15 or? 230 +/- 20
338#|#   220.3 - 234.3 => 227 +/- 7
339#|#   220.3 - 300.3 => 260 +/- 40
340#|#   220.3 - 1000 => 610 +/- 390 or? 600 +/- 400
341#|#   0.022 - 0.024 => 0.023 +/- 0.001
342#|#   0.022 - 0.032 => 0.027 +/- 0.005
343#|#   0.022 - 1.000 => 0.5 +/- 0.5
344#|# In other words, take one significant digit of the error value and
345#|# display the mean to the same precision.
346#|sub formatDataset {
347#|    my $ds = shift;
348#|    my $lower = $ds->getMean() - $ds->getError();
349#|    my $upper = $ds->getMean() + $ds->getError();
350#|    my $scale = 0;
351#|    # Find how many initial digits are the same
352#|    while ($lower < 1 ||
353#|           int($lower) == int($upper)) {
354#|        $lower *= 10;
355#|        $upper *= 10;
356#|        $scale++;
357#|    }
358#|    while ($lower >= 10 &&
359#|           int($lower) == int($upper)) {
360#|        $lower /= 10;
361#|        $upper /= 10;
362#|        $scale--;
363#|    }
364#|}
365
366#---------------------------------------------------------------------
367# Format a number, optionally with a +/- delta, to n significant
368# digits.
369#
370# @param significant digit, a value >= 1
371# @param multiplier
372# @param time in seconds to be formatted
373# @optional delta in seconds
374#
375# @return string of the form "23" or "23 +/- 10".
376#
377sub formatNumber {
378    my $sigdig = shift;
379    my $mult = shift;
380    my $a = shift;
381    my $delta = shift; # may be undef
382
383    my $result = formatSigDig($sigdig, $a*$mult);
384    if (defined($delta)) {
385        my $d = formatSigDig($sigdig, $delta*$mult);
386        # restrict PRECISION of delta to that of main number
387        if ($result =~ /\.(\d+)/) {
388            # TODO make this work for values with all significant
389            # digits to the left of the decimal, e.g., 1234000.
390
391            # TODO the other thing wrong with this is that it
392            # isn't rounding the $delta properly.  Have to put
393            # this logic into formatSigDig().
394            my $x = length($1);
395            $d =~ s/\.(\d{$x})\d+/.$1/;
396        }
397        $result .= " $PLUS_MINUS " . $d;
398    }
399    $result;
400}
401
402#---------------------------------------------------------------------
403# Format a time, optionally with a +/- delta, to n significant
404# digits.
405#
406# @param significant digit, a value >= 1
407# @param time in seconds to be formatted
408# @optional delta in seconds
409#
410# @return string of the form "23 ms" or "23 +/- 10 ms".
411#
412sub formatSeconds {
413    my $sigdig = shift;
414    my $a = shift;
415    my $delta = shift; # may be undef
416
417    my @MULT = (1   , 1e3,  1e6,  1e9);
418    my @SUFF = ('s' , 'ms', 'us', 'ns');
419
420    # Determine our scale
421    my $i = 0;
422    ++$i while ($a*$MULT[$i] < 1 && $i < @MULT);
423
424    formatNumber($sigdig, $MULT[$i], $a, $delta) . ' ' . $SUFF[$i];
425}
426
427#---------------------------------------------------------------------
428# Format a percentage, optionally with a +/- delta, to n significant
429# digits.
430#
431# @param significant digit, a value >= 1
432# @param value to be formatted, as a fraction, e.g. 0.5 for 50%
433# @optional delta, as a fraction
434#
435# @return string of the form "23 %" or "23 +/- 10 %".
436#
437sub formatPercent {
438    my $sigdig = shift;
439    my $a = shift;
440    my $delta = shift; # may be undef
441
442    formatNumber($sigdig, 100, $a, $delta) . ' %';
443}
444
445#---------------------------------------------------------------------
446# Format a number to n significant digits without using exponential
447# notation.
448#
449# @param significant digit, a value >= 1
450# @param number to be formatted
451#
452# @return string of the form "1234" "12.34" or "0.001234".  If
453#         number was negative, prefixed by '-'.
454#
455sub formatSigDig {
456    my $n = shift() - 1;
457    my $a = shift;
458
459    local $_ = sprintf("%.${n}e", $a);
460    my $sign = (s/^-//) ? '-' : '';
461
462    my $a_e;
463    my $result;
464    if (/^(\d)\.(\d+)e([-+]\d+)$/) {
465        my ($d, $dn, $e) = ($1, $2, $3);
466        $a_e = $e;
467        $d .= $dn;
468        $e++;
469        $d .= '0' while ($e > length($d));
470        while ($e < 1) {
471            $e++;
472            $d = '0' . $d;
473        }
474        if ($e == length($d)) {
475            $result = $sign . $d;
476        } else {
477            $result = $sign . substr($d, 0, $e) . '.' . substr($d, $e);
478        }
479    } else {
480        die "Can't parse $_";
481    }
482    $result;
483}
484
485#eof
486