filter_xml revision 436e89c602e787e7a27dd6624b09beed41a0da8a
1#!/usr/bin/env perl
2
3#---------------------------------------------------------------------
4# Quick and dirty program to filter helgrind's XML output.
5# 
6# The script works line-by-line and is generally unaware of XML structure
7# and does not bother with issues of well-formedness.
8#
9# Consists of two parts
10# (1) Global match and replace (see PATTERNS below)
11# (2) Removal of stack frames
12#     Stack frames whose associated file name does not match any name in
13#     TOOL_FILES or in the list of files given on the command line
14#     will be discarded. For a sequence of one or more discarded frames
15#     a line   <frame>...</frame> will be inserted.
16#
17#---------------------------------------------------------------------
18
19use warnings;
20use strict;
21
22#---------------------------------------------------------------------
23# A list of files specific to the tool at hand. Line numbers in
24# these files will be removed from stack frames matching these files.
25#---------------------------------------------------------------------
26my @tool_files = ( "hg_intercepts.c", "vg_replace_malloc.c" );
27
28# List of patterns and replacement strings. 
29# Each pattern must identify a substring which will be replaced.
30my %patterns = (
31    "<pid>(.*)</pid>"       => "...",
32    "<ppid>(.*)</ppid>"     => "...",
33    "<time>(.*)</time>"     => "...",
34    "<obj>(.*)</obj>"       => "...",
35    "<dir>(.*)</dir>"       => "...",
36    "<exe>(.*)</exe>"       => "...",
37    "<tid>(.*)</tid>"       => "...",
38    "<unique>(.*)</unique>" => "...",
39    "thread #([0-9]+)"      => "x",
40    "0x([0-9a-zA-Z]+)"      => "........",
41    "Using Valgrind-([^\\s]*)"    => "X.Y.X",
42    "Copyright \\(C\\) ([0-9]{4}-[0-9]{4}).*" => "XXXX-YYYY"
43);
44
45# List of XML sections to be ignored.
46my %ignore_sections = (
47    "<errorcounts>" => "</errorcounts>",
48    "<suppcounts>"  => "</suppcounts>"
49);
50
51
52# If FILE matches any of the FILES return 1
53sub file_matches ($$) {
54    my ($file, $files) = @_;
55    my ($string, $qstring);
56
57    foreach $string (@$files) {
58        $qstring = quotemeta($string);
59        return 1 if ($file =~ /$qstring/);
60    }
61
62    return 0;
63}
64
65
66my $frame_buf = "";
67my ($file, $lineno, $in_frame, $keep_frame, $num_discarded, $ignore_line);
68
69$in_frame = $keep_frame = $num_discarded = $ignore_line = 0;
70
71line: 
72while (<STDIN>) {
73    my $line = $_;
74    chomp($line);
75
76# Check whether we're ignoring this piece of XML..
77    if ($ignore_line) {
78        foreach my $tag (keys %ignore_sections) {
79            if ($line =~ $ignore_sections{$tag}) {
80                print "$tag...$ignore_sections{$tag}\n";
81                $ignore_line = 0;
82                next line;
83            }
84        }
85    } else {
86        foreach my $tag (keys %ignore_sections) {
87            if ($line =~ $tag) {
88                $ignore_line = 1;
89            }
90        }
91    }
92
93    next if ($ignore_line);
94
95# OK. This line is not to be ignored.
96
97# Massage line by applying PATTERNS.
98    foreach my $key (keys %patterns) {
99        if ($line =~ $key) {
100                $line =~ s/$1/$patterns{$key}/g;
101        }
102    }
103
104# Handle frames
105    if ($in_frame) {
106        if ($line =~ /<\/frame>/) {
107            $frame_buf .= "$line\n";
108# The end of a frame
109            if ($keep_frame) {
110# First: If there were any preceding frames that were discarded
111#        print <frame>...</frame>
112                if ($num_discarded) {
113                    print "    <frame>...</frame>\n";
114                    $num_discarded = 0;
115                }
116# Secondly: Write out the frame itself
117                print "$frame_buf";
118            } else {
119# We don't want to write this frame
120                ++$num_discarded;
121            }
122            $in_frame = $keep_frame = 0;
123            $file = "";
124        } elsif ($line =~ /<file>(.*)<\/file>/) {
125            $frame_buf .= "$line\n";
126            $file = $1;
127            if (file_matches($file, \@tool_files) ||
128                file_matches($file, \@ARGV)) {
129                $keep_frame = 1;
130            }
131        } elsif ($line =~ /<line>(.*)<\/line>/) {
132# This code assumes that <file> always precedes <line>
133            $lineno = $1;
134            if (file_matches($file, \@tool_files)) {
135                $line =~ s/$1/.../;
136            }
137            $frame_buf .= "$line\n";
138        } else {
139            $frame_buf .= "$line\n";
140        }
141    } else {
142# not within frame
143        if ($line =~ /<\/stack>/) {
144            print "    <frame>...</frame>\n" if ($num_discarded);
145            $num_discarded = 0;
146        }
147        if ($line =~ /<frame>/)  {
148            $in_frame = 1;
149            $frame_buf = "$line\n";
150        } else {
151            print "$line\n";
152        }
153    }
154} 
155
156exit 0;
157