1#!/usr/bin/env perl 
2
3#-----------------------------------------------------------------
4# Quick and dirty script to summarize build information for a
5# set of nightly runs.
6#
7# The results of the nighly regression runs are extracted from 
8# the GMANE mail archive. The URL for a given mail sent to the
9# valgrind-developers mailing list is
10#
11#   http://article.gmane.org/gmane.comp.debugging.valgrind.devel/<integer>
12#
13# The script extracts information about the regression run from a
14# block of information at the beginning of the mail. That information 
15# was added beginning October 4, 2011. Therefore, only regression runs
16# from that date or later can be analyzed.
17#
18# There is unfortunately no good way of figuring out the interval
19# of integers in the above URL that include all nightly regression
20# runs.
21#
22# The function get_regtest_data does all the work. It returns a hash
23# whose keys are the dates at which nightly runs took place. The value
24# is in turn a hash.
25#
26# Each such hash has the following keys:
27#   "builds"                 array of hashes
28#   "num_builds"             int
29#   "num_failing_builds"     int
30#   "num_passing_builds"     int
31#   "num_testcase_failures"  int
32#   "num_failing_testcases"  int
33#   "failure_frequency"      hash indexed by testcase name; value = int
34# 
35# "builds" is an array of hashes with the following keys
36#   "arch"                   string (architecture)
37#   "distro"                 string (distribution, e.g. Fedora-15)
38#   "failures"               array of strings (failing testcases)
39#   "valgrind revision"      integer
40#   "VEX revision"           integer
41#   "GCC version"            string
42#   "C library"              string
43#   "uname -mrs"             string
44#   "Vendor version"         string
45# 
46#-----------------------------------------------------------------
47use strict;
48use warnings; 
49
50use LWP::Simple;
51use Getopt::Long;
52
53my $prog_name = "nightly-build-summary";
54
55my $debug = 0;
56my $keep  = 0;
57
58my $usage=<<EOF;
59USAGE
60
61  $prog_name
62
63     --from=INTEGER    beginning of mail interval; > 14800
64
65    [--to=INTEGER]     end of mail interval; default = from + 100
66
67    [--debug]          verbose mode (debugging)
68
69    [--keep]           write individual emails to files (debugging)
70
71    [--dump]           write results suitable for post-processing
72
73    [--readable]       write results in human readable form (default)
74
75EOF
76
77
78#-----------------------------------------------------------------
79# Search for a line indicating that this is an email containing
80# the results of a valgrind regression run.
81# Return 1, if found and 0 oherwise.
82#-----------------------------------------------------------------
83sub is_regtest_result {
84    my (@lines) = @_;
85
86    foreach my $line (@lines) {
87        return 1 if ($line =~ "^valgrind revision:");
88    }
89
90    return 0;
91}
92
93
94#-----------------------------------------------------------------
95# Extract information from the run. Don't prep the data here. This
96# is done later on.
97#-----------------------------------------------------------------
98sub get_raw_data {
99    my (@lines, $msgno) = @_;
100    my ($i, $n, $line, $date);
101
102    $n = scalar @lines;
103
104    my %hash = ();
105
106# 1) Locate the section with the info about the environment of this nightly run
107    for ($i = 0; $i < $n; ++$i) {
108        last if ($lines[$i] =~ /^valgrind revision:/);
109    }
110    die "no info block in message $msgno" if ($i == $n);
111
112# 2) Read the info about the build: compiler, valgrind revision etc.
113#    and put it into a hash.
114    for ( ; $i < $n; ++$i) {
115        $line = $lines[$i];
116        last if ($line =~ /^$/);    # empty line indicates end of section
117        my ($key, $value) = split(/:/, $line);
118        $value =~ s/^[ ]*//;        # removing leading blanks
119        $hash{$key} = $value;
120    }
121
122    if ($debug) {
123        foreach my $key (keys %hash) {
124            my ($val) = $hash{$key};
125            print "regtest env: KEY = |$key|  VAL = |$val|\n";
126        }
127    }
128
129# 3) Get the date from when the build was kicked off.
130    for ( ; $i < $n; ++$i) {
131        $line = $lines[$i];
132
133        if ($line =~ /^Started at[ ]+([^ ]+)/) {
134            $date = $1;
135            print "DATE = $date\n";
136            last;
137        }
138    }
139    die "no date found in message $msgno" if ($i == $n);
140
141
142# 4) Find out if the regression run failed or passed
143    $hash{"failures"} = [];
144    for ($i = $i + 1; $i < $n; ++$i) {
145        $line = $lines[$i];
146        if ($line =~ /Running regression tests/) {
147            return %hash if ($line =~ /done$/);   # regtest succeeded; no failures
148            die "cannot determine regtest outcome for message $msgno"
149                if (! ($line =~ /failed$/));
150            last;
151        }
152    }
153
154# 5) Regtest failed; locate the section with the list of failing testcases
155    for ($i = $i + 1; $i < $n; ++$i) {
156        $line = $lines[$i];
157# Match for end-of-line == because line might be split.
158        last if ($line =~ /==$/);
159    }
160    die "cannot locate failing testcases in message $msgno" if ($i == $n);
161
162# 6) Get list of failing testcases
163    for ($i = $i + 1; $i < $n; ++$i) {
164        $line = $lines[$i];
165
166        last if ($line =~ /^$/);
167
168        my ($testcase) = (split(/\s+/, $line))[0];
169        print "ADD failing testcase $testcase\n" if ($debug);
170        push @{$hash{"failures"}}, $testcase;
171    }
172
173    return ($date, %hash);
174}
175
176
177#-----------------------------------------------------------------
178# Extract architecture; get a pretty name for the distro
179#-----------------------------------------------------------------
180sub prep_regtest_data {
181    my (%hash) = @_;
182    my ($val, $arch, $distro);
183
184    $val = $hash{"uname -mrs"};
185    die "uname -mrs info is missing" if (! defined $val);
186    $arch = (split(/ /, $val))[2];
187
188    $val = $hash{"Vendor version"};
189    die "Vendor version info is missing" if (! defined $val);
190
191    if ($val =~ /Fedora release ([0-9]+)/) {
192        $distro = "Fedora-$1";
193    } elsif ($val =~ /openSUSE ([0-9]+)\.([0-9]+)/) {
194        $distro = "openSUSE-$1.$2";
195    } elsif ($val =~ /SUSE Linux Enterprise Server 11 SP1/) {
196        $distro = "SLES-11-SP1";
197    } elsif ($val =~ /Red Hat Enterprise Linux AS release 4/) {
198        $distro = "RHEL-4";
199    } else {
200        $distro = "UNKNOWN";
201    }
202
203# Add architecture and distribution to hash
204    $hash{"arch"}   = $arch;
205    $hash{"distro"} = $distro;
206
207    return %hash;
208}
209
210
211#-----------------------------------------------------------------
212# Precompute some summary information and record it
213#-----------------------------------------------------------------
214sub precompute_summary_info
215{
216    my (%dates) = @_;
217
218    foreach my $date (sort keys %dates) {
219        my %failure_frequency = ();
220
221        my %nightly = %{ $dates{$date} };
222        my @builds  = @{ $nightly{"builds"} };
223
224        $nightly{"num_builds"} = scalar (@builds);
225        $nightly{"num_failing_builds"} = 0;
226        $nightly{"num_testcase_failures"} = 0;
227
228        foreach my $build (@builds) {
229            my %regtest_data   = %{ $build };
230
231            my @failures = @{ $regtest_data{"failures"} };
232            my $num_fail = scalar (@failures);
233
234            ++$nightly{"num_failing_builds"} if ($num_fail != 0);
235            $nightly{"num_testcase_failures"} += $num_fail;
236
237# Compute how often a testcase failed
238            foreach my $test ( @failures ) {
239                if (defined $failure_frequency{$test}) {
240                    ++$failure_frequency{$test};
241                } else {
242                    $failure_frequency{$test} = 1;
243                }
244            }
245        }
246
247        $nightly{"num_passing_builds"} = 
248            $nightly{"num_builds"} - $nightly{"num_failing_builds"};
249
250        $nightly{"num_failing_testcases"} = scalar (keys %failure_frequency);
251
252        $nightly{"failure_frequency"} = { %failure_frequency };
253
254        $dates{$date} = { %nightly };
255    }
256
257    return %dates;
258}
259
260
261#-----------------------------------------------------------------
262# Get messages from GMANE, and build up a database of results.
263#-----------------------------------------------------------------
264sub get_regtest_data {
265    my ($from, $to) = @_;
266
267    my $url_base = "http://article.gmane.org/gmane.comp.debugging.valgrind.devel/";
268
269    my %dates = ();
270
271    my $old_date = "-1";
272    my @builds = ();
273
274    for (my $i = $from; $i <= $to; ++$i) {
275        my $url = "$url_base" . "$i";
276
277	my $page = get("$url");
278
279        if ($keep) {
280            open (EMAIL, ">$i");
281            print EMAIL  $page;
282            close(EMAIL);
283        }
284
285# Detect if the article does not exist. Happens for too large --to= values 
286        last if ($page eq "No such file.\n");
287
288# Split the page into lines
289        my @lines = split(/\n/, $page);
290
291# Check whether it contains a regression test result
292        next if (! is_regtest_result(@lines));
293        print "message $i is a regression test result\n" if ($debug);
294
295# Get the raw data
296        my ($date, %regtest_data) = get_raw_data(@lines);
297
298        %regtest_data = prep_regtest_data(%regtest_data);
299
300        if ($date ne $old_date) {
301            my %nightly = ();
302            $nightly{"builds"} = [ @builds ];
303            $dates{$old_date} = { %nightly } if ($old_date ne "-1");
304
305            $old_date = $date;
306            @builds = ();
307        }
308
309        push @builds, { %regtest_data };
310    }
311    my %nightly = ();
312    $nightly{"builds"} = [ @builds ];
313    $dates{$old_date} = { %nightly } if ($old_date ne "-1");
314
315# Convenience: precompute some info we'll be interested in
316    %dates = precompute_summary_info( %dates );
317
318    return %dates;
319}
320
321
322#-----------------------------------------------------------------
323# Write out the results in a form suitable for automatic post-processing
324#-----------------------------------------------------------------
325sub dump_results {
326    my (%dates) = @_;
327
328    foreach my $date (sort keys %dates) {
329
330        my %nightly = %{ $dates{$date} };
331        my @builds  = @{ $nightly{"builds"} };
332
333        foreach my $build (@builds) {
334            my %regtest_data   = %{ $build };
335
336            my $arch     = $regtest_data{"arch"};
337            my $distro   = $regtest_data{"distro"};
338            my @failures = @{ $regtest_data{"failures"} };
339            my $num_fail = scalar (@failures);
340            my $fails    = join(":", sort @failures);
341
342            printf("Regrun: %s  %3d  %-10s %-20s %s\n",
343                   $date, $num_fail, $arch, $distro, $fails);
344        }
345
346        my %failure_frequency = %{ $nightly{"failure_frequency"} };
347
348        foreach my $test (keys %failure_frequency) {
349            printf("Test:   %s  %3d  %s\n",
350                   $date, $failure_frequency{$test}, $test);
351        }
352
353        printf("Total:  %s  builds: %d  %d fail  %d pass  tests: %d fail  %d unique\n",
354               $date, $nightly{"num_builds"}, $nightly{"num_failing_builds"},
355               $nightly{"num_passing_builds"}, $nightly{"num_testcase_failures"},
356               $nightly{"num_failing_testcases"});
357    }
358}
359
360
361sub write_readable_results {
362    my (%dates) = @_;
363
364    foreach my $date (sort keys %dates) {
365        my %nightly = %{ $dates{$date} };
366
367        print "$date\n----------\n";
368
369        printf("%3d builds\n", $nightly{"num_builds"});
370        printf("%3d builds fail\n", $nightly{"num_failing_builds"});
371        printf("%3d builds pass\n", $nightly{"num_passing_builds"});
372        print "\n";
373        printf("%3d testcase failures (across all runs)\n",
374               $nightly{"num_testcase_failures"});
375        printf("%3d failing testcases (unique)\n",
376               $nightly{"num_failing_testcases"});
377        print "\n";
378
379        my @builds  = @{ $nightly{"builds"} };
380
381        if ($nightly{"num_passing_builds"} != 0) {
382            print "Passing builds\n";
383            print "--------------\n";
384            foreach my $build (@builds) {
385                my %regtest_data = %{ $build };
386                my @failures     = @{ $regtest_data{"failures"} };
387                my $num_fail     = scalar (@failures);
388
389                if ($num_fail == 0) {
390                    my $arch   = $regtest_data{"arch"};
391                    my $distro = $regtest_data{"distro"};
392
393                    printf("%-8s %-15s\n", $arch, $distro);
394                }
395                print "\n";
396            }
397            print "\n";
398        }
399
400        if ($nightly{"num_failing_builds"} != 0) {
401            print "Failing builds\n";
402            print "--------------\n";
403            foreach my $build (@builds) {
404                my %regtest_data = %{ $build };
405                my @failures     = @{ $regtest_data{"failures"} };
406                my $num_fail     = scalar (@failures);
407
408                if ($num_fail != 0) {
409                    my $arch     = $regtest_data{"arch"};
410                    my $distro   = $regtest_data{"distro"};
411
412                    printf("%-8s %-15s %d failures\n", $arch, $distro, $num_fail);
413                    foreach my $test (@failures) {
414                        print "         $test\n";
415                    }
416                    print "\n";
417                }
418            }
419            print "\n";
420        }
421
422        print "Failing testcases and their frequency\n";
423        print "-------------------------------------\n";
424        my %failure_frequency = %{ $nightly{"failure_frequency"} };
425
426# Sorted in decreasing frequency
427        foreach my $test (sort {$failure_frequency{$b} cmp $failure_frequency{$a} }
428                          keys %failure_frequency) {
429            printf("%3d  %s\n", $failure_frequency{$test}, $test);
430        }
431        print "\n";
432    }
433}
434
435
436sub main
437{
438    my ($from, $to, $dump, $readable);
439
440    $from = $to = 0;
441    $dump = $readable = 0;
442
443    GetOptions( "from=i"   => \$from,
444                "to=i"     => \$to,
445                "debug"    => \$debug,
446                "dump"     => \$dump,
447                "keep"     => \$keep,
448                "readable" => \$readable
449        ) || die $usage;
450
451# 14800 is about Oct 4, 2011 which is when we began including information
452# about the environment
453
454    die $usage if ($from < 14800);
455
456    $to = $from + 100 if ($to == 0);
457
458    if ($from > $to) {
459        print STDERR "*** invalid [from,to] interval. Try again\n";
460        die $usage;
461    }
462
463    $readable = 1 if ($dump == 0 && $readable == 0);
464
465    print "check message interval [$from...$to]\n" if ($debug);
466
467# Get mails from GMANE mail archive
468
469    my %dates = get_regtest_data($from, $to);
470
471    dump_results(%dates) if ($dump);
472
473    write_readable_results(%dates) if ($readable);
474}
475
476main();
477
478exit 0;
479