1868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#!/usr/bin/env python 2868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) 3868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)""" 4868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)CmpRuns - A simple tool for comparing two static analyzer runs to determine 5868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)which reports have been added, removed, or changed. 6868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) 7868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)This is designed to support automated testing using the static analyzer, from 8868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)two perspectives: 9868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) 1. To monitor changes in the static analyzer's reports on real code bases, for 10868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) regression testing. 11868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) 12bb1529ce867d8845a77ec7cdf3e3003ef1771a40Ben Murdoch 2. For use by end users who want to integrate regular static analyzer testing 13868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) into a buildbot like environment. 14868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) 15868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)Usage: 16868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) 17868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) # Load the results of both runs, to obtain lists of the corresponding 18868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) # AnalysisDiagnostic objects. 19868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) # 20bb1529ce867d8845a77ec7cdf3e3003ef1771a40Ben Murdoch resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty) 21868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty) 22868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) 23868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) # Generate a relation from diagnostics in run A to diagnostics in run B 24868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) # to obtain a list of triples (a, b, confidence). 25868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) diff = compareResults(resultsA, resultsB) 26a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch 27a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch""" 28a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch 29868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)import os 30868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)import plistlib 31868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)import CmpRuns 32868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) 33868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)# Information about analysis run: 34868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)# path - the analysis output directory 35868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)# root - the name of the root directory, which will be disregarded when 36868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)# determining the source file name 37868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)class SingleRunInfo: 3803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) def __init__(self, path, root="", verboseLog=None): 39868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) self.path = path 40868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) self.root = root 41868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) self.verboseLog = verboseLog 42868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) 43868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)class AnalysisDiagnostic: 44868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) def __init__(self, data, report, htmlReport): 45 self._data = data 46 self._loc = self._data['location'] 47 self._report = report 48 self._htmlReport = htmlReport 49 50 def getFileName(self): 51 root = self._report.run.root 52 fileName = self._report.files[self._loc['file']] 53 if fileName.startswith(root) : 54 return fileName[len(root):] 55 return fileName 56 57 def getLine(self): 58 return self._loc['line'] 59 60 def getColumn(self): 61 return self._loc['col'] 62 63 def getCategory(self): 64 return self._data['category'] 65 66 def getDescription(self): 67 return self._data['description'] 68 69 def getIssueIdentifier(self) : 70 id = self.getFileName() + "+" 71 if 'issue_context' in self._data : 72 id += self._data['issue_context'] + "+" 73 if 'issue_hash' in self._data : 74 id += str(self._data['issue_hash']) 75 return id 76 77 def getReport(self): 78 if self._htmlReport is None: 79 return " " 80 return os.path.join(self._report.run.path, self._htmlReport) 81 82 def getReadableName(self): 83 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(), 84 self.getColumn(), self.getCategory(), 85 self.getDescription()) 86 87 # Note, the data format is not an API and may change from one analyzer 88 # version to another. 89 def getRawData(self): 90 return self._data 91 92class multidict: 93 def __init__(self, elts=()): 94 self.data = {} 95 for key,value in elts: 96 self[key] = value 97 98 def __getitem__(self, item): 99 return self.data[item] 100 def __setitem__(self, key, value): 101 if key in self.data: 102 self.data[key].append(value) 103 else: 104 self.data[key] = [value] 105 def items(self): 106 return self.data.items() 107 def values(self): 108 return self.data.values() 109 def keys(self): 110 return self.data.keys() 111 def __len__(self): 112 return len(self.data) 113 def get(self, key, default=None): 114 return self.data.get(key, default) 115 116class CmpOptions: 117 def __init__(self, verboseLog=None, rootA="", rootB=""): 118 self.rootA = rootA 119 self.rootB = rootB 120 self.verboseLog = verboseLog 121 122class AnalysisReport: 123 def __init__(self, run, files): 124 self.run = run 125 self.files = files 126 self.diagnostics = [] 127 128class AnalysisRun: 129 def __init__(self, info): 130 self.path = info.path 131 self.root = info.root 132 self.info = info 133 self.reports = [] 134 # Cumulative list of all diagnostics from all the reports. 135 self.diagnostics = [] 136 self.clang_version = None 137 138 def getClangVersion(self): 139 return self.clang_version 140 141 def readSingleFile(self, p, deleteEmpty): 142 data = plistlib.readPlist(p) 143 144 # We want to retrieve the clang version even if there are no 145 # reports. Assume that all reports were created using the same 146 # clang version (this is always true and is more efficient). 147 if 'clang_version' in data: 148 if self.clang_version == None: 149 self.clang_version = data.pop('clang_version') 150 else: 151 data.pop('clang_version') 152 153 # Ignore/delete empty reports. 154 if not data['files']: 155 if deleteEmpty == True: 156 os.remove(p) 157 return 158 159 # Extract the HTML reports, if they exists. 160 if 'HTMLDiagnostics_files' in data['diagnostics'][0]: 161 htmlFiles = [] 162 for d in data['diagnostics']: 163 # FIXME: Why is this named files, when does it have multiple 164 # files? 165 assert len(d['HTMLDiagnostics_files']) == 1 166 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0]) 167 else: 168 htmlFiles = [None] * len(data['diagnostics']) 169 170 report = AnalysisReport(self, data.pop('files')) 171 diagnostics = [AnalysisDiagnostic(d, report, h) 172 for d,h in zip(data.pop('diagnostics'), 173 htmlFiles)] 174 175 assert not data 176 177 report.diagnostics.extend(diagnostics) 178 self.reports.append(report) 179 self.diagnostics.extend(diagnostics) 180 181 182# Backward compatibility API. 183def loadResults(path, opts, root = "", deleteEmpty=True): 184 return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog), 185 deleteEmpty) 186 187# Load results of the analyzes from a given output folder. 188# - info is the SingleRunInfo object 189# - deleteEmpty specifies if the empty plist files should be deleted 190def loadResultsFromSingleRun(info, deleteEmpty=True): 191 path = info.path 192 run = AnalysisRun(info) 193 194 if os.path.isfile(path): 195 run.readSingleFile(path, deleteEmpty) 196 else: 197 for (dirpath, dirnames, filenames) in os.walk(path): 198 for f in filenames: 199 if (not f.endswith('plist')): 200 continue 201 p = os.path.join(dirpath, f) 202 run.readSingleFile(p, deleteEmpty) 203 204 return run 205 206def cmpAnalysisDiagnostic(d) : 207 return d.getIssueIdentifier() 208 209def compareResults(A, B): 210 """ 211 compareResults - Generate a relation from diagnostics in run A to 212 diagnostics in run B. 213 214 The result is the relation as a list of triples (a, b, confidence) where 215 each element {a,b} is None or an element from the respective run, and 216 confidence is a measure of the match quality (where 0 indicates equality, 217 and None is used if either element is None). 218 """ 219 220 res = [] 221 222 # Quickly eliminate equal elements. 223 neqA = [] 224 neqB = [] 225 eltsA = list(A.diagnostics) 226 eltsB = list(B.diagnostics) 227 eltsA.sort(key = cmpAnalysisDiagnostic) 228 eltsB.sort(key = cmpAnalysisDiagnostic) 229 while eltsA and eltsB: 230 a = eltsA.pop() 231 b = eltsB.pop() 232 if (a.getIssueIdentifier() == b.getIssueIdentifier()) : 233 res.append((a, b, 0)) 234 elif a.getIssueIdentifier() > b.getIssueIdentifier(): 235 eltsB.append(b) 236 neqA.append(a) 237 else: 238 eltsA.append(a) 239 neqB.append(b) 240 neqA.extend(eltsA) 241 neqB.extend(eltsB) 242 243 # FIXME: Add fuzzy matching. One simple and possible effective idea would be 244 # to bin the diagnostics, print them in a normalized form (based solely on 245 # the structure of the diagnostic), compute the diff, then use that as the 246 # basis for matching. This has the nice property that we don't depend in any 247 # way on the diagnostic format. 248 249 for a in neqA: 250 res.append((a, None, None)) 251 for b in neqB: 252 res.append((None, b, None)) 253 254 return res 255 256def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True): 257 # Load the run results. 258 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty) 259 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty) 260 261 # Open the verbose log, if given. 262 if opts.verboseLog: 263 auxLog = open(opts.verboseLog, "wb") 264 else: 265 auxLog = None 266 267 diff = compareResults(resultsA, resultsB) 268 foundDiffs = 0 269 for res in diff: 270 a,b,confidence = res 271 if a is None: 272 print "ADDED: %r" % b.getReadableName() 273 foundDiffs += 1 274 if auxLog: 275 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(), 276 b.getReport())) 277 elif b is None: 278 print "REMOVED: %r" % a.getReadableName() 279 foundDiffs += 1 280 if auxLog: 281 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(), 282 a.getReport())) 283 elif confidence: 284 print "CHANGED: %r to %r" % (a.getReadableName(), 285 b.getReadableName()) 286 foundDiffs += 1 287 if auxLog: 288 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)" 289 % (a.getReadableName(), 290 b.getReadableName(), 291 a.getReport(), 292 b.getReport())) 293 else: 294 pass 295 296 TotalReports = len(resultsB.diagnostics) 297 print "TOTAL REPORTS: %r" % TotalReports 298 print "TOTAL DIFFERENCES: %r" % foundDiffs 299 if auxLog: 300 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports 301 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs 302 303 return foundDiffs 304 305def main(): 306 from optparse import OptionParser 307 parser = OptionParser("usage: %prog [options] [dir A] [dir B]") 308 parser.add_option("", "--rootA", dest="rootA", 309 help="Prefix to ignore on source files for directory A", 310 action="store", type=str, default="") 311 parser.add_option("", "--rootB", dest="rootB", 312 help="Prefix to ignore on source files for directory B", 313 action="store", type=str, default="") 314 parser.add_option("", "--verbose-log", dest="verboseLog", 315 help="Write additional information to LOG [default=None]", 316 action="store", type=str, default=None, 317 metavar="LOG") 318 (opts, args) = parser.parse_args() 319 320 if len(args) != 2: 321 parser.error("invalid number of arguments") 322 323 dirA,dirB = args 324 325 dumpScanBuildResultsDiff(dirA, dirB, opts) 326 327if __name__ == '__main__': 328 main() 329