1#!/usr/bin/env python
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6# tsan_analyze.py
7
8''' Given a ThreadSanitizer output file, parses errors and uniques them.'''
9
10import gdb_helper
11
12from collections import defaultdict
13import hashlib
14import logging
15import optparse
16import os
17import re
18import subprocess
19import sys
20import time
21
22import common
23
24# Global symbol table (ugh)
25TheAddressTable = None
26
27class _StackTraceLine(object):
28  def __init__(self, line, address, binary):
29    self.raw_line_ = line
30    self.address = address
31    self.binary = binary
32  def __str__(self):
33    global TheAddressTable
34    file, line = TheAddressTable.GetFileLine(self.binary, self.address)
35    if (file is None) or (line is None):
36      return self.raw_line_
37    else:
38      return self.raw_line_.replace(self.binary, '%s:%s' % (file, line))
39
40class TsanAnalyzer(object):
41  ''' Given a set of ThreadSanitizer output files, parse all the errors out of
42  them, unique them and output the results.'''
43
44  LOAD_LIB_RE = re.compile('--[0-9]+-- ([^(:]*) \((0x[0-9a-f]+)\)')
45  TSAN_LINE_RE = re.compile('==[0-9]+==\s*[#0-9]+\s*'
46                            '([0-9A-Fa-fx]+):'
47                            '(?:[^ ]* )*'
48                            '([^ :\n]+)'
49                            '')
50  THREAD_CREATION_STR = ("INFO: T.* "
51      "(has been created by T.* at this point|is program's main thread)")
52
53  SANITY_TEST_SUPPRESSION = ("ThreadSanitizer sanity test "
54                             "(ToolsSanityTest.DataRace)")
55  TSAN_RACE_DESCRIPTION = "Possible data race"
56  TSAN_WARNING_DESCRIPTION =  ("Unlocking a non-locked lock"
57      "|accessing an invalid lock"
58      "|which did not acquire this lock")
59  RACE_VERIFIER_LINE = "Confirmed a race|unexpected race"
60  TSAN_ASSERTION = "Assertion failed: "
61
62  def __init__(self, use_gdb=False):
63    '''Reads in a set of files.'''
64
65    self._use_gdb = use_gdb
66    self._cur_testcase = None
67
68  def ReadLine(self):
69    self.line_ = self.cur_fd_.readline()
70    self.stack_trace_line_ = None
71    if not self._use_gdb:
72      return
73    global TheAddressTable
74    match = TsanAnalyzer.LOAD_LIB_RE.match(self.line_)
75    if match:
76      binary, ip = match.groups()
77      TheAddressTable.AddBinaryAt(binary, ip)
78      return
79    match = TsanAnalyzer.TSAN_LINE_RE.match(self.line_)
80    if match:
81      address, binary_name = match.groups()
82      stack_trace_line = _StackTraceLine(self.line_, address, binary_name)
83      TheAddressTable.Add(stack_trace_line.binary, stack_trace_line.address)
84      self.stack_trace_line_ = stack_trace_line
85
86  def ReadSection(self):
87    """ Example of a section:
88    ==4528== WARNING: Possible data race: {{{
89    ==4528==    T20 (L{}):
90    ==4528==     #0  MyTest::Foo1
91    ==4528==     #1  MyThread::ThreadBody
92    ==4528==   Concurrent write happened at this point:
93    ==4528==    T19 (L{}):
94    ==4528==     #0  MyTest::Foo2
95    ==4528==     #1  MyThread::ThreadBody
96    ==4528== }}}
97    ------- suppression -------
98    {
99      <Put your suppression name here>
100      ThreadSanitizer:Race
101      fun:MyTest::Foo1
102      fun:MyThread::ThreadBody
103    }
104    ------- end suppression -------
105    """
106    result = [self.line_]
107    if re.search("{{{", self.line_):
108      while not re.search('}}}', self.line_):
109        self.ReadLine()
110        if self.stack_trace_line_ is None:
111          result.append(self.line_)
112        else:
113          result.append(self.stack_trace_line_)
114      self.ReadLine()
115      if re.match('-+ suppression -+', self.line_):
116        # We need to calculate the suppression hash and prepend a line like
117        # "Suppression (error hash=#0123456789ABCDEF#):" so the buildbot can
118        # extract the suppression snippet.
119        supp = ""
120        while not re.match('-+ end suppression -+', self.line_):
121          self.ReadLine()
122          supp += self.line_
123        self.ReadLine()
124        if self._cur_testcase:
125          result.append("The report came from the `%s` test.\n" % \
126                        self._cur_testcase)
127        result.append("Suppression (error hash=#%016X#):\n" % \
128                      (int(hashlib.md5(supp).hexdigest()[:16], 16)))
129        result.append("  For more info on using suppressions see "
130            "http://dev.chromium.org/developers/how-tos/using-valgrind/threadsanitizer#TOC-Suppressing-data-races\n")
131        result.append(supp)
132    else:
133      self.ReadLine()
134
135    return result
136
137  def ReadTillTheEnd(self):
138    result = [self.line_]
139    while self.line_:
140      self.ReadLine()
141      result.append(self.line_)
142    return result
143
144  def ParseReportFile(self, filename):
145    '''Parses a report file and returns a list of ThreadSanitizer reports.
146
147
148    Args:
149      filename: report filename.
150    Returns:
151      list of (list of (str iff self._use_gdb, _StackTraceLine otherwise)).
152    '''
153    ret = []
154    self.cur_fd_ = open(filename, 'r')
155
156    while True:
157      # Read ThreadSanitizer reports.
158      self.ReadLine()
159      if not self.line_:
160        break
161
162      while True:
163        tmp = []
164        while re.search(TsanAnalyzer.RACE_VERIFIER_LINE, self.line_):
165          tmp.append(self.line_)
166          self.ReadLine()
167        while re.search(TsanAnalyzer.THREAD_CREATION_STR, self.line_):
168          tmp.extend(self.ReadSection())
169        if re.search(TsanAnalyzer.TSAN_RACE_DESCRIPTION, self.line_):
170          tmp.extend(self.ReadSection())
171          ret.append(tmp)  # includes RaceVerifier and thread creation stacks
172        elif (re.search(TsanAnalyzer.TSAN_WARNING_DESCRIPTION, self.line_) and
173            not common.IsWindows()): # workaround for http://crbug.com/53198
174          tmp.extend(self.ReadSection())
175          ret.append(tmp)
176        else:
177          break
178
179      tmp = []
180      if re.search(TsanAnalyzer.TSAN_ASSERTION, self.line_):
181        tmp.extend(self.ReadTillTheEnd())
182        ret.append(tmp)
183        break
184
185      match = re.search("used_suppression:\s+([0-9]+)\s(.*)", self.line_)
186      if match:
187        count, supp_name = match.groups()
188        count = int(count)
189        self.used_suppressions[supp_name] += count
190    self.cur_fd_.close()
191    return ret
192
193  def GetReports(self, files):
194    '''Extracts reports from a set of files.
195
196    Reads a set of files and returns a list of all discovered
197    ThreadSanitizer race reports. As a side effect, populates
198    self.used_suppressions with appropriate info.
199    '''
200
201    global TheAddressTable
202    if self._use_gdb:
203      TheAddressTable = gdb_helper.AddressTable()
204    else:
205      TheAddressTable = None
206    reports = []
207    self.used_suppressions = defaultdict(int)
208    for file in files:
209      reports.extend(self.ParseReportFile(file))
210    if self._use_gdb:
211      TheAddressTable.ResolveAll()
212      # Make each line of each report a string.
213      reports = map(lambda(x): map(str, x), reports)
214    return [''.join(report_lines) for report_lines in reports]
215
216  def Report(self, files, testcase, check_sanity=False):
217    '''Reads in a set of files and prints ThreadSanitizer report.
218
219    Args:
220      files: A list of filenames.
221      check_sanity: if true, search for SANITY_TEST_SUPPRESSIONS
222    '''
223
224    # We set up _cur_testcase class-wide variable to avoid passing it through
225    # about 5 functions.
226    self._cur_testcase = testcase
227    reports = self.GetReports(files)
228    self._cur_testcase = None  # just in case, shouldn't be used anymore
229
230    common.PrintUsedSuppressionsList(self.used_suppressions)
231
232
233    retcode = 0
234    if reports:
235      sys.stdout.flush()
236      sys.stderr.flush()
237      logging.info("FAIL! Found %i report(s)" % len(reports))
238      for report in reports:
239        logging.info('\n' + report)
240      sys.stdout.flush()
241      retcode = -1
242
243    # Report tool's insanity even if there were errors.
244    if (check_sanity and
245        TsanAnalyzer.SANITY_TEST_SUPPRESSION not in self.used_suppressions):
246      logging.error("FAIL! Sanity check failed!")
247      retcode = -3
248
249    if retcode != 0:
250      return retcode
251
252    logging.info("PASS: No reports found")
253    return 0
254
255
256def main():
257  '''For testing only. The TsanAnalyzer class should be imported instead.'''
258  parser = optparse.OptionParser("usage: %prog <files to analyze>")
259
260  (options, args) = parser.parse_args()
261  if not args:
262    parser.error("no filename specified")
263  filenames = args
264
265  logging.getLogger().setLevel(logging.INFO)
266  analyzer = TsanAnalyzer(use_gdb=True)
267  return analyzer.Report(filenames, None)
268
269
270if __name__ == '__main__':
271  sys.exit(main())
272