1#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9  1. To monitor changes in the static analyzer's reports on real code bases, for
10     regression testing.
11
12  2. For use by end users who want to integrate regular static analyzer testing
13     into a buildbot like environment.
14"""
15
16import os
17import plistlib
18
19#
20
21class multidict:
22    def __init__(self, elts=()):
23        self.data = {}
24        for key,value in elts:
25            self[key] = value
26
27    def __getitem__(self, item):
28        return self.data[item]
29    def __setitem__(self, key, value):
30        if key in self.data:
31            self.data[key].append(value)
32        else:
33            self.data[key] = [value]
34    def items(self):
35        return self.data.items()
36    def values(self):
37        return self.data.values()
38    def keys(self):
39        return self.data.keys()
40    def __len__(self):
41        return len(self.data)
42    def get(self, key, default=None):
43        return self.data.get(key, default)
44
45#
46
47class CmpOptions:
48    def __init__(self, verboseLog=None, root=""):
49        self.root = root
50        self.verboseLog = verboseLog
51
52class AnalysisReport:
53    def __init__(self, run, files):
54        self.run = run
55        self.files = files
56
57class AnalysisDiagnostic:
58    def __init__(self, data, report, htmlReport):
59        self.data = data
60        self.report = report
61        self.htmlReport = htmlReport
62
63    def getReadableName(self):
64        loc = self.data['location']
65        filename = self.report.run.getSourceName(self.report.files[loc['file']])
66        line = loc['line']
67        column = loc['col']
68        category = self.data['category']
69        description = self.data['description']
70
71        # FIXME: Get a report number based on this key, to 'distinguish'
72        # reports, or something.
73
74        return '%s:%d:%d, %s: %s' % (filename, line, column, category,
75                                   description)
76
77    def getReportData(self):
78        if self.htmlReport is None:
79            return " "
80        return os.path.join(self.report.run.path, self.htmlReport)
81        # We could also dump the report with:
82        # return open(os.path.join(self.report.run.path,
83        #                         self.htmlReport), "rb").read()
84
85class AnalysisRun:
86    def __init__(self, path, opts):
87        self.path = path
88        self.reports = []
89        self.diagnostics = []
90        self.opts = opts
91
92    def getSourceName(self, path):
93        if path.startswith(self.opts.root):
94            return path[len(self.opts.root):]
95        return path
96
97def loadResults(path, opts, deleteEmpty=True):
98    run = AnalysisRun(path, opts)
99
100    for f in os.listdir(path):
101        if (not f.startswith('report') or
102            not f.endswith('plist')):
103            continue
104
105        p = os.path.join(path, f)
106        data = plistlib.readPlist(p)
107
108        # Ignore/delete empty reports.
109        if not data['files']:
110            if deleteEmpty == True:
111                os.remove(p)
112            continue
113
114        # Extract the HTML reports, if they exists.
115        if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
116            htmlFiles = []
117            for d in data['diagnostics']:
118                # FIXME: Why is this named files, when does it have multiple
119                # files?
120                assert len(d['HTMLDiagnostics_files']) == 1
121                htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
122        else:
123            htmlFiles = [None] * len(data['diagnostics'])
124
125        report = AnalysisReport(run, data.pop('files'))
126        diagnostics = [AnalysisDiagnostic(d, report, h)
127                       for d,h in zip(data.pop('diagnostics'),
128                                      htmlFiles)]
129
130        assert not data
131
132        run.reports.append(report)
133        run.diagnostics.extend(diagnostics)
134
135    return run
136
137def compareResults(A, B):
138    """
139    compareResults - Generate a relation from diagnostics in run A to
140    diagnostics in run B.
141
142    The result is the relation as a list of triples (a, b, confidence) where
143    each element {a,b} is None or an element from the respective run, and
144    confidence is a measure of the match quality (where 0 indicates equality,
145    and None is used if either element is None).
146    """
147
148    res = []
149
150    # Quickly eliminate equal elements.
151    neqA = []
152    neqB = []
153    eltsA = list(A.diagnostics)
154    eltsB = list(B.diagnostics)
155    eltsA.sort(key = lambda d: d.data)
156    eltsB.sort(key = lambda d: d.data)
157    while eltsA and eltsB:
158        a = eltsA.pop()
159        b = eltsB.pop()
160        if a.data['location'] == b.data['location']:
161            res.append((a, b, 0))
162        elif a.data > b.data:
163            neqA.append(a)
164            eltsB.append(b)
165        else:
166            neqB.append(b)
167            eltsA.append(a)
168    neqA.extend(eltsA)
169    neqB.extend(eltsB)
170
171    # FIXME: Add fuzzy matching. One simple and possible effective idea would be
172    # to bin the diagnostics, print them in a normalized form (based solely on
173    # the structure of the diagnostic), compute the diff, then use that as the
174    # basis for matching. This has the nice property that we don't depend in any
175    # way on the diagnostic format.
176
177    for a in neqA:
178        res.append((a, None, None))
179    for b in neqB:
180        res.append((None, b, None))
181
182    return res
183
184def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True):
185    # Load the run results.
186    resultsA = loadResults(dirA, opts, deleteEmpty)
187    resultsB = loadResults(dirB, opts, deleteEmpty)
188
189    # Open the verbose log, if given.
190    if opts.verboseLog:
191        auxLog = open(opts.verboseLog, "wb")
192    else:
193        auxLog = None
194
195    diff = compareResults(resultsA, resultsB)
196    foundDiffs = 0
197    for res in diff:
198        a,b,confidence = res
199        if a is None:
200            print "ADDED: %r" % b.getReadableName()
201            foundDiffs += 1
202            if auxLog:
203                print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
204                                                        b.getReportData()))
205        elif b is None:
206            print "REMOVED: %r" % a.getReadableName()
207            foundDiffs += 1
208            if auxLog:
209                print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
210                                                          a.getReportData()))
211        elif confidence:
212            print "CHANGED: %r to %r" % (a.getReadableName(),
213                                         b.getReadableName())
214            foundDiffs += 1
215            if auxLog:
216                print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
217                                 % (a.getReadableName(),
218                                    b.getReadableName(),
219                                    a.getReportData(),
220                                    b.getReportData()))
221        else:
222            pass
223
224    TotalReports = len(resultsB.diagnostics)
225    print "TOTAL REPORTS: %r" % TotalReports
226    print "TOTAL DIFFERENCES: %r" % foundDiffs
227    if auxLog:
228        print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
229        print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
230
231    return foundDiffs
232
233def main():
234    from optparse import OptionParser
235    parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
236    parser.add_option("", "--root", dest="root",
237                      help="Prefix to ignore on source files",
238                      action="store", type=str, default="")
239    parser.add_option("", "--verbose-log", dest="verboseLog",
240                      help="Write additional information to LOG [default=None]",
241                      action="store", type=str, default=None,
242                      metavar="LOG")
243    (opts, args) = parser.parse_args()
244
245    if len(args) != 2:
246        parser.error("invalid number of arguments")
247
248    dirA,dirB = args
249
250    cmpScanBuildResults(dirA, dirB, opts)
251
252if __name__ == '__main__':
253    main()
254