1#!/usr/bin/env python 2 3""" 4CmpRuns - A simple tool for comparing two static analyzer runs to determine 5which reports have been added, removed, or changed. 6 7This is designed to support automated testing using the static analyzer, from 8two perspectives: 9 1. To monitor changes in the static analyzer's reports on real code bases, for 10 regression testing. 11 12 2. For use by end users who want to integrate regular static analyzer testing 13 into a buildbot like environment. 14""" 15 16import os 17import plistlib 18 19# 20 21class multidict: 22 def __init__(self, elts=()): 23 self.data = {} 24 for key,value in elts: 25 self[key] = value 26 27 def __getitem__(self, item): 28 return self.data[item] 29 def __setitem__(self, key, value): 30 if key in self.data: 31 self.data[key].append(value) 32 else: 33 self.data[key] = [value] 34 def items(self): 35 return self.data.items() 36 def values(self): 37 return self.data.values() 38 def keys(self): 39 return self.data.keys() 40 def __len__(self): 41 return len(self.data) 42 def get(self, key, default=None): 43 return self.data.get(key, default) 44 45# 46 47class CmpOptions: 48 def __init__(self, verboseLog=None, root=""): 49 self.root = root 50 self.verboseLog = verboseLog 51 52class AnalysisReport: 53 def __init__(self, run, files): 54 self.run = run 55 self.files = files 56 57class AnalysisDiagnostic: 58 def __init__(self, data, report, htmlReport): 59 self.data = data 60 self.report = report 61 self.htmlReport = htmlReport 62 63 def getReadableName(self): 64 loc = self.data['location'] 65 filename = self.report.run.getSourceName(self.report.files[loc['file']]) 66 line = loc['line'] 67 column = loc['col'] 68 69 # FIXME: Get a report number based on this key, to 'distinguish' 70 # reports, or something. 71 72 return '%s:%d:%d' % (filename, line, column) 73 74 def getReportData(self): 75 if self.htmlReport is None: 76 return "This diagnostic does not have any report data." 77 78 return open(os.path.join(self.report.run.path, 79 self.htmlReport), "rb").read() 80 81class AnalysisRun: 82 def __init__(self, path, opts): 83 self.path = path 84 self.reports = [] 85 self.diagnostics = [] 86 self.opts = opts 87 88 def getSourceName(self, path): 89 if path.startswith(self.opts.root): 90 return path[len(self.opts.root):] 91 return path 92 93def loadResults(path, opts, deleteEmpty=True): 94 run = AnalysisRun(path, opts) 95 96 for f in os.listdir(path): 97 if (not f.startswith('report') or 98 not f.endswith('plist')): 99 continue 100 101 p = os.path.join(path, f) 102 data = plistlib.readPlist(p) 103 104 # Ignore/delete empty reports. 105 if not data['files']: 106 if deleteEmpty == True: 107 os.remove(p) 108 continue 109 110 # Extract the HTML reports, if they exists. 111 if 'HTMLDiagnostics_files' in data['diagnostics'][0]: 112 htmlFiles = [] 113 for d in data['diagnostics']: 114 # FIXME: Why is this named files, when does it have multiple 115 # files? 116 assert len(d['HTMLDiagnostics_files']) == 1 117 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0]) 118 else: 119 htmlFiles = [None] * len(data['diagnostics']) 120 121 report = AnalysisReport(run, data.pop('files')) 122 diagnostics = [AnalysisDiagnostic(d, report, h) 123 for d,h in zip(data.pop('diagnostics'), 124 htmlFiles)] 125 126 assert not data 127 128 run.reports.append(report) 129 run.diagnostics.extend(diagnostics) 130 131 return run 132 133def compareResults(A, B): 134 """ 135 compareResults - Generate a relation from diagnostics in run A to 136 diagnostics in run B. 137 138 The result is the relation as a list of triples (a, b, confidence) where 139 each element {a,b} is None or an element from the respective run, and 140 confidence is a measure of the match quality (where 0 indicates equality, 141 and None is used if either element is None). 142 """ 143 144 res = [] 145 146 # Quickly eliminate equal elements. 147 neqA = [] 148 neqB = [] 149 eltsA = list(A.diagnostics) 150 eltsB = list(B.diagnostics) 151 eltsA.sort(key = lambda d: d.data) 152 eltsB.sort(key = lambda d: d.data) 153 while eltsA and eltsB: 154 a = eltsA.pop() 155 b = eltsB.pop() 156 if a.data == b.data: 157 res.append((a, b, 0)) 158 elif a.data > b.data: 159 neqA.append(a) 160 eltsB.append(b) 161 else: 162 neqB.append(b) 163 eltsA.append(a) 164 neqA.extend(eltsA) 165 neqB.extend(eltsB) 166 167 # FIXME: Add fuzzy matching. One simple and possible effective idea would be 168 # to bin the diagnostics, print them in a normalized form (based solely on 169 # the structure of the diagnostic), compute the diff, then use that as the 170 # basis for matching. This has the nice property that we don't depend in any 171 # way on the diagnostic format. 172 173 for a in neqA: 174 res.append((a, None, None)) 175 for b in neqB: 176 res.append((None, b, None)) 177 178 return res 179 180def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True): 181 # Load the run results. 182 resultsA = loadResults(dirA, opts, deleteEmpty) 183 resultsB = loadResults(dirB, opts, deleteEmpty) 184 185 # Open the verbose log, if given. 186 if opts.verboseLog: 187 auxLog = open(opts.verboseLog, "wb") 188 else: 189 auxLog = None 190 191 diff = compareResults(resultsA, resultsB) 192 foundDiffs = False 193 for res in diff: 194 a,b,confidence = res 195 if a is None: 196 print "ADDED: %r" % b.getReadableName() 197 foundDiffs = True 198 if auxLog: 199 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(), 200 b.getReportData())) 201 elif b is None: 202 print "REMOVED: %r" % a.getReadableName() 203 foundDiffs = True 204 if auxLog: 205 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(), 206 a.getReportData())) 207 elif confidence: 208 print "CHANGED: %r to %r" % (a.getReadableName(), 209 b.getReadableName()) 210 foundDiffs = True 211 if auxLog: 212 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)" 213 % (a.getReadableName(), 214 b.getReadableName(), 215 a.getReportData(), 216 b.getReportData())) 217 else: 218 pass 219 220 print "TOTAL REPORTS: %r" % len(resultsB.diagnostics) 221 if auxLog: 222 print >>auxLog, "('TOTAL REPORTS', %r)" % len(resultsB.diagnostics) 223 224 return foundDiffs 225 226def main(): 227 from optparse import OptionParser 228 parser = OptionParser("usage: %prog [options] [dir A] [dir B]") 229 parser.add_option("", "--root", dest="root", 230 help="Prefix to ignore on source files", 231 action="store", type=str, default="") 232 parser.add_option("", "--verbose-log", dest="verboseLog", 233 help="Write additional information to LOG [default=None]", 234 action="store", type=str, default=None, 235 metavar="LOG") 236 (opts, args) = parser.parse_args() 237 238 if len(args) != 2: 239 parser.error("invalid number of arguments") 240 241 dirA,dirB = args 242 243 cmpScanBuildResults(dirA, dirB, opts) 244 245if __name__ == '__main__': 246 main() 247