1#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9  1. To monitor changes in the static analyzer's reports on real code bases, for
10     regression testing.
11
12  2. For use by end users who want to integrate regular static analyzer testing
13     into a buildbot like environment.
14"""
15
16import os
17import plistlib
18
19#
20
21class multidict:
22    def __init__(self, elts=()):
23        self.data = {}
24        for key,value in elts:
25            self[key] = value
26
27    def __getitem__(self, item):
28        return self.data[item]
29    def __setitem__(self, key, value):
30        if key in self.data:
31            self.data[key].append(value)
32        else:
33            self.data[key] = [value]
34    def items(self):
35        return self.data.items()
36    def values(self):
37        return self.data.values()
38    def keys(self):
39        return self.data.keys()
40    def __len__(self):
41        return len(self.data)
42    def get(self, key, default=None):
43        return self.data.get(key, default)
44
45#
46
47class CmpOptions:
48    def __init__(self, verboseLog=None, root=""):
49        self.root = root
50        self.verboseLog = verboseLog
51
52class AnalysisReport:
53    def __init__(self, run, files):
54        self.run = run
55        self.files = files
56
57class AnalysisDiagnostic:
58    def __init__(self, data, report, htmlReport):
59        self.data = data
60        self.report = report
61        self.htmlReport = htmlReport
62
63    def getReadableName(self):
64        loc = self.data['location']
65        filename = self.report.run.getSourceName(self.report.files[loc['file']])
66        line = loc['line']
67        column = loc['col']
68
69        # FIXME: Get a report number based on this key, to 'distinguish'
70        # reports, or something.
71
72        return '%s:%d:%d' % (filename, line, column)
73
74    def getReportData(self):
75        if self.htmlReport is None:
76            return "This diagnostic does not have any report data."
77
78        return open(os.path.join(self.report.run.path,
79                                 self.htmlReport), "rb").read()
80
81class AnalysisRun:
82    def __init__(self, path, opts):
83        self.path = path
84        self.reports = []
85        self.diagnostics = []
86        self.opts = opts
87
88    def getSourceName(self, path):
89        if path.startswith(self.opts.root):
90            return path[len(self.opts.root):]
91        return path
92
93def loadResults(path, opts, deleteEmpty=True):
94    run = AnalysisRun(path, opts)
95
96    for f in os.listdir(path):
97        if (not f.startswith('report') or
98            not f.endswith('plist')):
99            continue
100
101        p = os.path.join(path, f)
102        data = plistlib.readPlist(p)
103
104        # Ignore/delete empty reports.
105        if not data['files']:
106            if deleteEmpty == True:
107                os.remove(p)
108            continue
109
110        # Extract the HTML reports, if they exists.
111        if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
112            htmlFiles = []
113            for d in data['diagnostics']:
114                # FIXME: Why is this named files, when does it have multiple
115                # files?
116                assert len(d['HTMLDiagnostics_files']) == 1
117                htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
118        else:
119            htmlFiles = [None] * len(data['diagnostics'])
120
121        report = AnalysisReport(run, data.pop('files'))
122        diagnostics = [AnalysisDiagnostic(d, report, h)
123                       for d,h in zip(data.pop('diagnostics'),
124                                      htmlFiles)]
125
126        assert not data
127
128        run.reports.append(report)
129        run.diagnostics.extend(diagnostics)
130
131    return run
132
133def compareResults(A, B):
134    """
135    compareResults - Generate a relation from diagnostics in run A to
136    diagnostics in run B.
137
138    The result is the relation as a list of triples (a, b, confidence) where
139    each element {a,b} is None or an element from the respective run, and
140    confidence is a measure of the match quality (where 0 indicates equality,
141    and None is used if either element is None).
142    """
143
144    res = []
145
146    # Quickly eliminate equal elements.
147    neqA = []
148    neqB = []
149    eltsA = list(A.diagnostics)
150    eltsB = list(B.diagnostics)
151    eltsA.sort(key = lambda d: d.data)
152    eltsB.sort(key = lambda d: d.data)
153    while eltsA and eltsB:
154        a = eltsA.pop()
155        b = eltsB.pop()
156        if a.data == b.data:
157            res.append((a, b, 0))
158        elif a.data > b.data:
159            neqA.append(a)
160            eltsB.append(b)
161        else:
162            neqB.append(b)
163            eltsA.append(a)
164    neqA.extend(eltsA)
165    neqB.extend(eltsB)
166
167    # FIXME: Add fuzzy matching. One simple and possible effective idea would be
168    # to bin the diagnostics, print them in a normalized form (based solely on
169    # the structure of the diagnostic), compute the diff, then use that as the
170    # basis for matching. This has the nice property that we don't depend in any
171    # way on the diagnostic format.
172
173    for a in neqA:
174        res.append((a, None, None))
175    for b in neqB:
176        res.append((None, b, None))
177
178    return res
179
180def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True):
181    # Load the run results.
182    resultsA = loadResults(dirA, opts, deleteEmpty)
183    resultsB = loadResults(dirB, opts, deleteEmpty)
184
185    # Open the verbose log, if given.
186    if opts.verboseLog:
187        auxLog = open(opts.verboseLog, "wb")
188    else:
189        auxLog = None
190
191    diff = compareResults(resultsA, resultsB)
192    foundDiffs = False
193    for res in diff:
194        a,b,confidence = res
195        if a is None:
196            print "ADDED: %r" % b.getReadableName()
197            foundDiffs = True
198            if auxLog:
199                print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
200                                                        b.getReportData()))
201        elif b is None:
202            print "REMOVED: %r" % a.getReadableName()
203            foundDiffs = True
204            if auxLog:
205                print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
206                                                          a.getReportData()))
207        elif confidence:
208            print "CHANGED: %r to %r" % (a.getReadableName(),
209                                         b.getReadableName())
210            foundDiffs = True
211            if auxLog:
212                print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
213                                 % (a.getReadableName(),
214                                    b.getReadableName(),
215                                    a.getReportData(),
216                                    b.getReportData()))
217        else:
218            pass
219
220    print "TOTAL REPORTS: %r" % len(resultsB.diagnostics)
221    if auxLog:
222        print >>auxLog, "('TOTAL REPORTS', %r)" % len(resultsB.diagnostics)
223
224    return foundDiffs
225
226def main():
227    from optparse import OptionParser
228    parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
229    parser.add_option("", "--root", dest="root",
230                      help="Prefix to ignore on source files",
231                      action="store", type=str, default="")
232    parser.add_option("", "--verbose-log", dest="verboseLog",
233                      help="Write additional information to LOG [default=None]",
234                      action="store", type=str, default=None,
235                      metavar="LOG")
236    (opts, args) = parser.parse_args()
237
238    if len(args) != 2:
239        parser.error("invalid number of arguments")
240
241    dirA,dirB = args
242
243    cmpScanBuildResults(dirA, dirB, opts)
244
245if __name__ == '__main__':
246    main()
247