1301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar#!/usr/bin/env python
2301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
3301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar"""
4301f7acce86e39320f072f002df5e5a9004e040aDaniel DunbarCmpRuns - A simple tool for comparing two static analyzer runs to determine
5301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbarwhich reports have been added, removed, or changed.
6301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
7301f7acce86e39320f072f002df5e5a9004e040aDaniel DunbarThis is designed to support automated testing using the static analyzer, from
887d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainartwo perspectives:
9301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar  1. To monitor changes in the static analyzer's reports on real code bases, for
10301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar     regression testing.
11301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
12301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar  2. For use by end users who want to integrate regular static analyzer testing
13301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar     into a buildbot like environment.
147acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks
157acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna ZaksUsage:
167acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks
177acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks    # Load the results of both runs, to obtain lists of the corresponding
187acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks    # AnalysisDiagnostic objects.
192a84b8bd106db5c878ab7bbaa848f49bfd6d42f9Anna Zaks    #
2081765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks    resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
2181765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks    resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
2287d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar
2387d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar    # Generate a relation from diagnostics in run A to diagnostics in run B
2487d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar    # to obtain a list of triples (a, b, confidence).
257acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks    diff = compareResults(resultsA, resultsB)
2687d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar
27301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar"""
28301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
29301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbarimport os
30301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbarimport plistlib
3181765577dbf740c7cef1edfe59661c95408fa85bAnna Zaksimport CmpRuns
3281765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks
3381765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks# Information about analysis run:
3481765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks# path - the analysis output directory
3587d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar# root - the name of the root directory, which will be disregarded when
3681765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks# determining the source file name
3781765577dbf740c7cef1edfe59661c95408fa85bAnna Zaksclass SingleRunInfo:
3881765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks    def __init__(self, path, root="", verboseLog=None):
3981765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks        self.path = path
4087d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar        self.root = root.rstrip("/\\")
4181765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks        self.verboseLog = verboseLog
42301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
437acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaksclass AnalysisDiagnostic:
447acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks    def __init__(self, data, report, htmlReport):
457acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks        self._data = data
467acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks        self._loc = self._data['location']
477acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks        self._report = report
487acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks        self._htmlReport = htmlReport
497acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks
507acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks    def getFileName(self):
5181765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks        root = self._report.run.root
5220d928164a6263edb558fdccae6e73988cf4ae7dAnna Zaks        fileName = self._report.files[self._loc['file']]
5387d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar        if fileName.startswith(root) and len(root) > 0:
5487d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar            return fileName[len(root)+1:]
5581765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks        return fileName
5681765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks
577acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks    def getLine(self):
587acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks        return self._loc['line']
5987d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar
607acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks    def getColumn(self):
617acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks        return self._loc['col']
627acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks
637acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks    def getCategory(self):
647acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks        return self._data['category']
657acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks
667acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks    def getDescription(self):
677acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks        return self._data['description']
687acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks
697acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks    def getIssueIdentifier(self) :
7081765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks        id = self.getFileName() + "+"
717acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks        if 'issue_context' in self._data :
7281765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks          id += self._data['issue_context'] + "+"
7387d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar        if 'issue_hash_content_of_line_in_context' in self._data :
7487d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar          id += str(self._data['issue_hash_content_of_line_in_context'])
7581765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks        return id
767acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks
777acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks    def getReport(self):
787acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks        if self._htmlReport is None:
797acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks            return " "
807acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks        return os.path.join(self._report.run.path, self._htmlReport)
817acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks
827acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks    def getReadableName(self):
8387d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar        return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
8487d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar                                     self.getColumn(), self.getCategory(),
857acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks                                     self.getDescription())
8687d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar
8787d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar    # Note, the data format is not an API and may change from one analyzer
8887d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar    # version to another.
8920d928164a6263edb558fdccae6e73988cf4ae7dAnna Zaks    def getRawData(self):
9020d928164a6263edb558fdccae6e73988cf4ae7dAnna Zaks        return self._data
91301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
92301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbarclass multidict:
93301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    def __init__(self, elts=()):
94301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        self.data = {}
95301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        for key,value in elts:
96301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar            self[key] = value
9787d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar
98301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    def __getitem__(self, item):
99301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        return self.data[item]
100301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    def __setitem__(self, key, value):
101301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        if key in self.data:
102301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar            self.data[key].append(value)
103301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        else:
104301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar            self.data[key] = [value]
105301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    def items(self):
106301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        return self.data.items()
107301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    def values(self):
108301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        return self.data.values()
109301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    def keys(self):
110301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        return self.data.keys()
111301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    def __len__(self):
112301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        return len(self.data)
113301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    def get(self, key, default=None):
114301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        return self.data.get(key, default)
115301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
116544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaksclass CmpOptions:
1172a84b8bd106db5c878ab7bbaa848f49bfd6d42f9Anna Zaks    def __init__(self, verboseLog=None, rootA="", rootB=""):
1182a84b8bd106db5c878ab7bbaa848f49bfd6d42f9Anna Zaks        self.rootA = rootA
1192a84b8bd106db5c878ab7bbaa848f49bfd6d42f9Anna Zaks        self.rootB = rootB
120544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks        self.verboseLog = verboseLog
121544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks
122301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbarclass AnalysisReport:
123f0024960d5f9edc5728128b49cb758e689dd3746Anna Zaks    def __init__(self, run, files):
124301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        self.run = run
12520d928164a6263edb558fdccae6e73988cf4ae7dAnna Zaks        self.files = files
12620d928164a6263edb558fdccae6e73988cf4ae7dAnna Zaks        self.diagnostics = []
127301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
128301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbarclass AnalysisRun:
12981765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks    def __init__(self, info):
13081765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks        self.path = info.path
13181765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks        self.root = info.root
13281765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks        self.info = info
133301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        self.reports = []
13420d928164a6263edb558fdccae6e73988cf4ae7dAnna Zaks        # Cumulative list of all diagnostics from all the reports.
135301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        self.diagnostics = []
136f0024960d5f9edc5728128b49cb758e689dd3746Anna Zaks        self.clang_version = None
13787d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar
138f0024960d5f9edc5728128b49cb758e689dd3746Anna Zaks    def getClangVersion(self):
139f0024960d5f9edc5728128b49cb758e689dd3746Anna Zaks        return self.clang_version
140301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
141e30024c96f88578fb7666d6d76977e4834127919Jordan Rose    def readSingleFile(self, p, deleteEmpty):
142e30024c96f88578fb7666d6d76977e4834127919Jordan Rose        data = plistlib.readPlist(p)
143e30024c96f88578fb7666d6d76977e4834127919Jordan Rose
14487d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar        # We want to retrieve the clang version even if there are no
14587d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar        # reports. Assume that all reports were created using the same
146e30024c96f88578fb7666d6d76977e4834127919Jordan Rose        # clang version (this is always true and is more efficient).
147e30024c96f88578fb7666d6d76977e4834127919Jordan Rose        if 'clang_version' in data:
148e30024c96f88578fb7666d6d76977e4834127919Jordan Rose            if self.clang_version == None:
149e30024c96f88578fb7666d6d76977e4834127919Jordan Rose                self.clang_version = data.pop('clang_version')
150e30024c96f88578fb7666d6d76977e4834127919Jordan Rose            else:
151e30024c96f88578fb7666d6d76977e4834127919Jordan Rose                data.pop('clang_version')
152e30024c96f88578fb7666d6d76977e4834127919Jordan Rose
153e30024c96f88578fb7666d6d76977e4834127919Jordan Rose        # Ignore/delete empty reports.
154e30024c96f88578fb7666d6d76977e4834127919Jordan Rose        if not data['files']:
155e30024c96f88578fb7666d6d76977e4834127919Jordan Rose            if deleteEmpty == True:
156e30024c96f88578fb7666d6d76977e4834127919Jordan Rose                os.remove(p)
157e30024c96f88578fb7666d6d76977e4834127919Jordan Rose            return
158e30024c96f88578fb7666d6d76977e4834127919Jordan Rose
159e30024c96f88578fb7666d6d76977e4834127919Jordan Rose        # Extract the HTML reports, if they exists.
160e30024c96f88578fb7666d6d76977e4834127919Jordan Rose        if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
161e30024c96f88578fb7666d6d76977e4834127919Jordan Rose            htmlFiles = []
162e30024c96f88578fb7666d6d76977e4834127919Jordan Rose            for d in data['diagnostics']:
163e30024c96f88578fb7666d6d76977e4834127919Jordan Rose                # FIXME: Why is this named files, when does it have multiple
164e30024c96f88578fb7666d6d76977e4834127919Jordan Rose                # files?
165e30024c96f88578fb7666d6d76977e4834127919Jordan Rose                assert len(d['HTMLDiagnostics_files']) == 1
166e30024c96f88578fb7666d6d76977e4834127919Jordan Rose                htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
167e30024c96f88578fb7666d6d76977e4834127919Jordan Rose        else:
168e30024c96f88578fb7666d6d76977e4834127919Jordan Rose            htmlFiles = [None] * len(data['diagnostics'])
16987d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar
170e30024c96f88578fb7666d6d76977e4834127919Jordan Rose        report = AnalysisReport(self, data.pop('files'))
17187d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar        diagnostics = [AnalysisDiagnostic(d, report, h)
172e30024c96f88578fb7666d6d76977e4834127919Jordan Rose                       for d,h in zip(data.pop('diagnostics'),
173e30024c96f88578fb7666d6d76977e4834127919Jordan Rose                                      htmlFiles)]
174e30024c96f88578fb7666d6d76977e4834127919Jordan Rose
175e30024c96f88578fb7666d6d76977e4834127919Jordan Rose        assert not data
176e30024c96f88578fb7666d6d76977e4834127919Jordan Rose
177e30024c96f88578fb7666d6d76977e4834127919Jordan Rose        report.diagnostics.extend(diagnostics)
178e30024c96f88578fb7666d6d76977e4834127919Jordan Rose        self.reports.append(report)
179e30024c96f88578fb7666d6d76977e4834127919Jordan Rose        self.diagnostics.extend(diagnostics)
180e30024c96f88578fb7666d6d76977e4834127919Jordan Rose
181301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
18287d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar# Backward compatibility API.
1832a84b8bd106db5c878ab7bbaa848f49bfd6d42f9Anna Zaksdef loadResults(path, opts, root = "", deleteEmpty=True):
18481765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks    return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
18581765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks                                    deleteEmpty)
18681765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks
18781765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks# Load results of the analyzes from a given output folder.
18881765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks# - info is the SingleRunInfo object
18981765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks# - deleteEmpty specifies if the empty plist files should be deleted
19081765577dbf740c7cef1edfe59661c95408fa85bAnna Zaksdef loadResultsFromSingleRun(info, deleteEmpty=True):
19181765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks    path = info.path
19281765577dbf740c7cef1edfe59661c95408fa85bAnna Zaks    run = AnalysisRun(info)
193e30024c96f88578fb7666d6d76977e4834127919Jordan Rose
194e30024c96f88578fb7666d6d76977e4834127919Jordan Rose    if os.path.isfile(path):
195e30024c96f88578fb7666d6d76977e4834127919Jordan Rose        run.readSingleFile(path, deleteEmpty)
196e30024c96f88578fb7666d6d76977e4834127919Jordan Rose    else:
197e30024c96f88578fb7666d6d76977e4834127919Jordan Rose        for (dirpath, dirnames, filenames) in os.walk(path):
198e30024c96f88578fb7666d6d76977e4834127919Jordan Rose            for f in filenames:
199e30024c96f88578fb7666d6d76977e4834127919Jordan Rose                if (not f.endswith('plist')):
200e30024c96f88578fb7666d6d76977e4834127919Jordan Rose                    continue
201e30024c96f88578fb7666d6d76977e4834127919Jordan Rose                p = os.path.join(dirpath, f)
202e30024c96f88578fb7666d6d76977e4834127919Jordan Rose                run.readSingleFile(p, deleteEmpty)
203e30024c96f88578fb7666d6d76977e4834127919Jordan Rose
204301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    return run
205301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
2067acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaksdef cmpAnalysisDiagnostic(d) :
2077acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks    return d.getIssueIdentifier()
20819b17cb57ab809e5e3f02ac0beb85003350d560aAnna Zaks
209301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbardef compareResults(A, B):
210301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    """
211301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    compareResults - Generate a relation from diagnostics in run A to
212301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    diagnostics in run B.
213301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
214301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    The result is the relation as a list of triples (a, b, confidence) where
215301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    each element {a,b} is None or an element from the respective run, and
216301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    confidence is a measure of the match quality (where 0 indicates equality,
217301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    and None is used if either element is None).
218301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    """
219301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
220301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    res = []
221301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
222301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    # Quickly eliminate equal elements.
223301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    neqA = []
224301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    neqB = []
225301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    eltsA = list(A.diagnostics)
226301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    eltsB = list(B.diagnostics)
2277acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks    eltsA.sort(key = cmpAnalysisDiagnostic)
2287acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks    eltsB.sort(key = cmpAnalysisDiagnostic)
229301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    while eltsA and eltsB:
230301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        a = eltsA.pop()
231301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        b = eltsB.pop()
2327acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks        if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
233301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar            res.append((a, b, 0))
23420d928164a6263edb558fdccae6e73988cf4ae7dAnna Zaks        elif a.getIssueIdentifier() > b.getIssueIdentifier():
235301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar            eltsB.append(b)
23620d928164a6263edb558fdccae6e73988cf4ae7dAnna Zaks            neqA.append(a)
237301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        else:
238301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar            eltsA.append(a)
23920d928164a6263edb558fdccae6e73988cf4ae7dAnna Zaks            neqB.append(b)
240301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    neqA.extend(eltsA)
241301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    neqB.extend(eltsB)
242301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
243301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    # FIXME: Add fuzzy matching. One simple and possible effective idea would be
244301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    # to bin the diagnostics, print them in a normalized form (based solely on
245301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    # the structure of the diagnostic), compute the diff, then use that as the
246301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    # basis for matching. This has the nice property that we don't depend in any
247301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    # way on the diagnostic format.
248301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
249301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    for a in neqA:
250301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        res.append((a, None, None))
251301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    for b in neqB:
252301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        res.append((None, b, None))
253301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
254301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    return res
255301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
2567acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaksdef dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
257301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    # Load the run results.
2582a84b8bd106db5c878ab7bbaa848f49bfd6d42f9Anna Zaks    resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
2592a84b8bd106db5c878ab7bbaa848f49bfd6d42f9Anna Zaks    resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
26087d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar
261301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    # Open the verbose log, if given.
262301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    if opts.verboseLog:
263301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        auxLog = open(opts.verboseLog, "wb")
264301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    else:
265301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        auxLog = None
266301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
267301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    diff = compareResults(resultsA, resultsB)
268a7a2564ff59a1917c5f27343923635bd231466d6Anna Zaks    foundDiffs = 0
269301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    for res in diff:
270301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        a,b,confidence = res
271301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        if a is None:
272301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar            print "ADDED: %r" % b.getReadableName()
273a7a2564ff59a1917c5f27343923635bd231466d6Anna Zaks            foundDiffs += 1
274301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar            if auxLog:
275301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar                print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
2767acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks                                                        b.getReport()))
277301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        elif b is None:
278301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar            print "REMOVED: %r" % a.getReadableName()
279a7a2564ff59a1917c5f27343923635bd231466d6Anna Zaks            foundDiffs += 1
280301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar            if auxLog:
281301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar                print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
2827acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks                                                          a.getReport()))
283301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        elif confidence:
284301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar            print "CHANGED: %r to %r" % (a.getReadableName(),
285301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar                                         b.getReadableName())
286a7a2564ff59a1917c5f27343923635bd231466d6Anna Zaks            foundDiffs += 1
287301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar            if auxLog:
28887d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar                print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
289301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar                                 % (a.getReadableName(),
290301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar                                    b.getReadableName(),
2917acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks                                    a.getReport(),
2927acc407240152e9f7a6e9f2efb24aa4b2a71c8deAnna Zaks                                    b.getReport()))
293301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar        else:
294301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar            pass
295301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
296a7a2564ff59a1917c5f27343923635bd231466d6Anna Zaks    TotalReports = len(resultsB.diagnostics)
297a7a2564ff59a1917c5f27343923635bd231466d6Anna Zaks    print "TOTAL REPORTS: %r" % TotalReports
298a7a2564ff59a1917c5f27343923635bd231466d6Anna Zaks    print "TOTAL DIFFERENCES: %r" % foundDiffs
299301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    if auxLog:
300a7a2564ff59a1917c5f27343923635bd231466d6Anna Zaks        print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
301a7a2564ff59a1917c5f27343923635bd231466d6Anna Zaks        print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
30287d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar
30387d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar    return foundDiffs, len(resultsA.diagnostics), len(resultsB.diagnostics)
304544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks
305544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaksdef main():
306544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks    from optparse import OptionParser
307544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks    parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
3082a84b8bd106db5c878ab7bbaa848f49bfd6d42f9Anna Zaks    parser.add_option("", "--rootA", dest="rootA",
3092a84b8bd106db5c878ab7bbaa848f49bfd6d42f9Anna Zaks                      help="Prefix to ignore on source files for directory A",
3102a84b8bd106db5c878ab7bbaa848f49bfd6d42f9Anna Zaks                      action="store", type=str, default="")
3112a84b8bd106db5c878ab7bbaa848f49bfd6d42f9Anna Zaks    parser.add_option("", "--rootB", dest="rootB",
3122a84b8bd106db5c878ab7bbaa848f49bfd6d42f9Anna Zaks                      help="Prefix to ignore on source files for directory B",
313544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks                      action="store", type=str, default="")
314544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks    parser.add_option("", "--verbose-log", dest="verboseLog",
315544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks                      help="Write additional information to LOG [default=None]",
316544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks                      action="store", type=str, default=None,
317544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks                      metavar="LOG")
318544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks    (opts, args) = parser.parse_args()
319544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks
320544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks    if len(args) != 2:
321544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks        parser.error("invalid number of arguments")
322544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks
323544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks    dirA,dirB = args
324544055fa4663298bd2361e9cdfc684934d81e42fAnna Zaks
32587d948ecccffea9e9e37d0d053b246e2d6d6c47bPirama Arumuga Nainar    dumpScanBuildResultsDiff(dirA, dirB, opts)
326301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar
327301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbarif __name__ == '__main__':
328301f7acce86e39320f072f002df5e5a9004e040aDaniel Dunbar    main()
329