1#!/usr/bin/env python
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6# memcheck_analyze.py
7
8''' Given a valgrind XML file, parses errors and uniques them.'''
9
10import gdb_helper
11
12from collections import defaultdict
13import hashlib
14import logging
15import optparse
16import os
17import re
18import subprocess
19import sys
20import time
21from xml.dom.minidom import parse
22from xml.parsers.expat import ExpatError
23
24import common
25
26# Global symbol table (yuck)
27TheAddressTable = None
28
29# These are regexps that define functions (using C++ mangled names)
30# we don't want to see in stack traces while pretty printing
31# or generating suppressions.
32# Just stop printing the stack/suppression frames when the current one
33# matches any of these.
34_BORING_CALLERS = common.BoringCallers(mangled=True, use_re_wildcards=True)
35
36def getTextOf(top_node, name):
37  ''' Returns all text in all DOM nodes with a certain |name| that are children
38  of |top_node|.
39  '''
40
41  text = ""
42  for nodes_named in top_node.getElementsByTagName(name):
43    text += "".join([node.data for node in nodes_named.childNodes
44                     if node.nodeType == node.TEXT_NODE])
45  return text
46
47def getCDATAOf(top_node, name):
48  ''' Returns all CDATA in all DOM nodes with a certain |name| that are children
49  of |top_node|.
50  '''
51
52  text = ""
53  for nodes_named in top_node.getElementsByTagName(name):
54    text += "".join([node.data for node in nodes_named.childNodes
55                     if node.nodeType == node.CDATA_SECTION_NODE])
56  if (text == ""):
57    return None
58  return text
59
60def shortenFilePath(source_dir, directory):
61  '''Returns a string with the string prefix |source_dir| removed from
62  |directory|.'''
63  prefixes_to_cut = ["build/src/", "valgrind/coregrind/", "out/Release/../../"]
64
65  if source_dir:
66    prefixes_to_cut.append(source_dir)
67
68  for p in prefixes_to_cut:
69    index = directory.rfind(p)
70    if index != -1:
71      directory = directory[index + len(p):]
72
73  return directory
74
75# Constants that give real names to the abbreviations in valgrind XML output.
76INSTRUCTION_POINTER = "ip"
77OBJECT_FILE = "obj"
78FUNCTION_NAME = "fn"
79SRC_FILE_DIR = "dir"
80SRC_FILE_NAME = "file"
81SRC_LINE = "line"
82
83def gatherFrames(node, source_dir):
84  frames = []
85  for frame in node.getElementsByTagName("frame"):
86    frame_dict = {
87      INSTRUCTION_POINTER : getTextOf(frame, INSTRUCTION_POINTER),
88      OBJECT_FILE         : getTextOf(frame, OBJECT_FILE),
89      FUNCTION_NAME       : getTextOf(frame, FUNCTION_NAME),
90      SRC_FILE_DIR        : shortenFilePath(
91          source_dir, getTextOf(frame, SRC_FILE_DIR)),
92      SRC_FILE_NAME       : getTextOf(frame, SRC_FILE_NAME),
93      SRC_LINE            : getTextOf(frame, SRC_LINE)
94    }
95
96    # Ignore this frame and all the following if it's a "boring" function.
97    enough_frames = False
98    for regexp in _BORING_CALLERS:
99      if re.match("^%s$" % regexp, frame_dict[FUNCTION_NAME]):
100        enough_frames = True
101        break
102    if enough_frames:
103      break
104
105    frames += [frame_dict]
106
107    global TheAddressTable
108    if TheAddressTable != None and frame_dict[SRC_LINE] == "":
109      # Try using gdb
110      TheAddressTable.Add(frame_dict[OBJECT_FILE],
111                          frame_dict[INSTRUCTION_POINTER])
112  return frames
113
114class ValgrindError:
115  ''' Takes a <DOM Element: error> node and reads all the data from it. A
116  ValgrindError is immutable and is hashed on its pretty printed output.
117  '''
118
119  def __init__(self, source_dir, error_node, commandline, testcase):
120    ''' Copies all the relevant information out of the DOM and into object
121    properties.
122
123    Args:
124      error_node: The <error></error> DOM node we're extracting from.
125      source_dir: Prefix that should be stripped from the <dir> node.
126      commandline: The command that was run under valgrind
127      testcase: The test case name, if known.
128    '''
129
130    # Valgrind errors contain one <what><stack> pair, plus an optional
131    # <auxwhat><stack> pair, plus an optional <origin><what><stack></origin>,
132    # plus (since 3.5.0) a <suppression></suppression> pair.
133    # (Origin is nicely enclosed; too bad the other two aren't.)
134    # The most common way to see all three in one report is
135    # a syscall with a parameter that points to uninitialized memory, e.g.
136    # Format:
137    # <error>
138    #   <unique>0x6d</unique>
139    #   <tid>1</tid>
140    #   <kind>SyscallParam</kind>
141    #   <what>Syscall param write(buf) points to uninitialised byte(s)</what>
142    #   <stack>
143    #     <frame>
144    #     ...
145    #     </frame>
146    #   </stack>
147    #   <auxwhat>Address 0x5c9af4f is 7 bytes inside a block of ...</auxwhat>
148    #   <stack>
149    #     <frame>
150    #     ...
151    #     </frame>
152    #   </stack>
153    #   <origin>
154    #   <what>Uninitialised value was created by a heap allocation</what>
155    #   <stack>
156    #     <frame>
157    #     ...
158    #     </frame>
159    #   </stack>
160    #   </origin>
161    #   <suppression>
162    #     <sname>insert_a_suppression_name_here</sname>
163    #     <skind>Memcheck:Param</skind>
164    #     <skaux>write(buf)</skaux>
165    #     <sframe> <fun>__write_nocancel</fun> </sframe>
166    #     ...
167    #     <sframe> <fun>main</fun> </sframe>
168    #     <rawtext>
169    # <![CDATA[
170    # {
171    #    <insert_a_suppression_name_here>
172    #    Memcheck:Param
173    #    write(buf)
174    #    fun:__write_nocancel
175    #    ...
176    #    fun:main
177    # }
178    # ]]>
179    #     </rawtext>
180    #   </suppression>
181    # </error>
182    #
183    # Each frame looks like this:
184    #  <frame>
185    #    <ip>0x83751BC</ip>
186    #    <obj>/data/dkegel/chrome-build/src/out/Release/base_unittests</obj>
187    #    <fn>_ZN7testing8internal12TestInfoImpl7RunTestEPNS_8TestInfoE</fn>
188    #    <dir>/data/dkegel/chrome-build/src/testing/gtest/src</dir>
189    #    <file>gtest-internal-inl.h</file>
190    #    <line>655</line>
191    #  </frame>
192    # although the dir, file, and line elements are missing if there is
193    # no debug info.
194
195    self._kind = getTextOf(error_node, "kind")
196    self._backtraces = []
197    self._suppression = None
198    self._commandline = commandline
199    self._testcase = testcase
200    self._additional = []
201
202    # Iterate through the nodes, parsing <what|auxwhat><stack> pairs.
203    description = None
204    for node in error_node.childNodes:
205      if node.localName == "what" or node.localName == "auxwhat":
206        description = "".join([n.data for n in node.childNodes
207                              if n.nodeType == n.TEXT_NODE])
208      elif node.localName == "xwhat":
209        description = getTextOf(node, "text")
210      elif node.localName == "stack":
211        assert description
212        self._backtraces.append([description, gatherFrames(node, source_dir)])
213        description = None
214      elif node.localName == "origin":
215        description = getTextOf(node, "what")
216        stack = node.getElementsByTagName("stack")[0]
217        frames = gatherFrames(stack, source_dir)
218        self._backtraces.append([description, frames])
219        description = None
220        stack = None
221        frames = None
222      elif description and node.localName != None:
223        # The lastest description has no stack, e.g. "Address 0x28 is unknown"
224        self._additional.append(description)
225        description = None
226
227      if node.localName == "suppression":
228        self._suppression = getCDATAOf(node, "rawtext");
229
230  def __str__(self):
231    ''' Pretty print the type and backtrace(s) of this specific error,
232        including suppression (which is just a mangled backtrace).'''
233    output = ""
234    if (self._commandline):
235      output += self._commandline + "\n"
236
237    output += self._kind + "\n"
238    for backtrace in self._backtraces:
239      output += backtrace[0] + "\n"
240      filter = subprocess.Popen("c++filt -n", stdin=subprocess.PIPE,
241                                stdout=subprocess.PIPE,
242                                stderr=subprocess.STDOUT,
243                                shell=True,
244                                close_fds=True)
245      buf = ""
246      for frame in backtrace[1]:
247        buf +=  (frame[FUNCTION_NAME] or frame[INSTRUCTION_POINTER]) + "\n"
248      (stdoutbuf, stderrbuf) = filter.communicate(buf.encode('latin-1'))
249      demangled_names = stdoutbuf.split("\n")
250
251      i = 0
252      for frame in backtrace[1]:
253        output += ("  " + demangled_names[i])
254        i = i + 1
255
256        global TheAddressTable
257        if TheAddressTable != None and frame[SRC_FILE_DIR] == "":
258           # Try using gdb
259           foo = TheAddressTable.GetFileLine(frame[OBJECT_FILE],
260                                             frame[INSTRUCTION_POINTER])
261           if foo[0] != None:
262             output += (" (" + foo[0] + ":" + foo[1] + ")")
263        elif frame[SRC_FILE_DIR] != "":
264          output += (" (" + frame[SRC_FILE_DIR] + "/" + frame[SRC_FILE_NAME] +
265                     ":" + frame[SRC_LINE] + ")")
266        else:
267          output += " (" + frame[OBJECT_FILE] + ")"
268        output += "\n"
269
270    for additional in self._additional:
271      output += additional + "\n"
272
273    assert self._suppression != None, "Your Valgrind doesn't generate " \
274                                      "suppressions - is it too old?"
275
276    if self._testcase:
277      output += "The report came from the `%s` test.\n" % self._testcase
278    output += "Suppression (error hash=#%016X#):\n" % self.ErrorHash()
279    output += ("  For more info on using suppressions see "
280               "http://dev.chromium.org/developers/tree-sheriffs/sheriff-details-chromium/memory-sheriff#TOC-Suppressing-memory-reports")
281
282    # Widen suppression slightly to make portable between mac and linux
283    # TODO(timurrrr): Oops, these transformations should happen
284    # BEFORE calculating the hash!
285    supp = self._suppression;
286    supp = supp.replace("fun:_Znwj", "fun:_Znw*")
287    supp = supp.replace("fun:_Znwm", "fun:_Znw*")
288    supp = supp.replace("fun:_Znaj", "fun:_Zna*")
289    supp = supp.replace("fun:_Znam", "fun:_Zna*")
290
291    # Make suppressions even less platform-dependent.
292    for sz in [1, 2, 4, 8]:
293      supp = supp.replace("Memcheck:Addr%d" % sz, "Memcheck:Unaddressable")
294      supp = supp.replace("Memcheck:Value%d" % sz, "Memcheck:Uninitialized")
295    supp = supp.replace("Memcheck:Cond", "Memcheck:Uninitialized")
296
297    # Split into lines so we can enforce length limits
298    supplines = supp.split("\n")
299    supp = None  # to avoid re-use
300
301    # Truncate at line 26 (VG_MAX_SUPP_CALLERS plus 2 for name and type)
302    # or at the first 'boring' caller.
303    # (https://bugs.kde.org/show_bug.cgi?id=199468 proposes raising
304    # VG_MAX_SUPP_CALLERS, but we're probably fine with it as is.)
305    newlen = min(26, len(supplines));
306
307    # Drop boring frames and all the following.
308    enough_frames = False
309    for frameno in range(newlen):
310      for boring_caller in _BORING_CALLERS:
311        if re.match("^ +fun:%s$" % boring_caller, supplines[frameno]):
312          newlen = frameno
313          enough_frames = True
314          break
315      if enough_frames:
316        break
317    if (len(supplines) > newlen):
318      supplines = supplines[0:newlen]
319      supplines.append("}")
320
321    for frame in range(len(supplines)):
322      # Replace the always-changing anonymous namespace prefix with "*".
323      m = re.match("( +fun:)_ZN.*_GLOBAL__N_.*\.cc_" +
324                   "[0-9a-fA-F]{8}_[0-9a-fA-F]{8}(.*)",
325                   supplines[frame])
326      if m:
327        supplines[frame] = "*".join(m.groups())
328
329    output += "\n".join(supplines) + "\n"
330
331    return output
332
333  def UniqueString(self):
334    ''' String to use for object identity. Don't print this, use str(obj)
335    instead.'''
336    rep = self._kind + " "
337    for backtrace in self._backtraces:
338      for frame in backtrace[1]:
339        rep += frame[FUNCTION_NAME]
340
341        if frame[SRC_FILE_DIR] != "":
342          rep += frame[SRC_FILE_DIR] + "/" + frame[SRC_FILE_NAME]
343        else:
344          rep += frame[OBJECT_FILE]
345
346    return rep
347
348  # This is a device-independent hash identifying the suppression.
349  # By printing out this hash we can find duplicate reports between tests and
350  # different shards running on multiple buildbots
351  def ErrorHash(self):
352    return int(hashlib.md5(self.UniqueString()).hexdigest()[:16], 16)
353
354  def __hash__(self):
355    return hash(self.UniqueString())
356  def __eq__(self, rhs):
357    return self.UniqueString() == rhs
358
359def log_is_finished(f, force_finish):
360  f.seek(0)
361  prev_line = ""
362  while True:
363    line = f.readline()
364    if line == "":
365      if not force_finish:
366        return False
367      # Okay, the log is not finished but we can make it up to be parseable:
368      if prev_line.strip() in ["</error>", "</errorcounts>", "</status>"]:
369        f.write("</valgrindoutput>\n")
370        return True
371      return False
372    if '</valgrindoutput>' in line:
373      # Valgrind often has garbage after </valgrindoutput> upon crash.
374      f.truncate()
375      return True
376    prev_line = line
377
378class MemcheckAnalyzer:
379  ''' Given a set of Valgrind XML files, parse all the errors out of them,
380  unique them and output the results.'''
381
382  SANITY_TEST_SUPPRESSIONS = {
383      "Memcheck sanity test 01 (memory leak).": 1,
384      "Memcheck sanity test 02 (malloc/read left).": 1,
385      "Memcheck sanity test 03 (malloc/read right).": 1,
386      "Memcheck sanity test 04 (malloc/write left).": 1,
387      "Memcheck sanity test 05 (malloc/write right).": 1,
388      "Memcheck sanity test 06 (new/read left).": 1,
389      "Memcheck sanity test 07 (new/read right).": 1,
390      "Memcheck sanity test 08 (new/write left).": 1,
391      "Memcheck sanity test 09 (new/write right).": 1,
392      "Memcheck sanity test 10 (write after free).": 1,
393      "Memcheck sanity test 11 (write after delete).": 1,
394      "Memcheck sanity test 12 (array deleted without []).": 1,
395      "Memcheck sanity test 13 (single element deleted with []).": 1,
396      "Memcheck sanity test 14 (malloc/read uninit).": 1,
397      "Memcheck sanity test 15 (new/read uninit).": 1,
398  }
399
400  # Max time to wait for memcheck logs to complete.
401  LOG_COMPLETION_TIMEOUT = 180.0
402
403  def __init__(self, source_dir, show_all_leaks=False, use_gdb=False):
404    '''Create a parser for Memcheck logs.
405
406    Args:
407      source_dir: Path to top of source tree for this build
408      show_all_leaks: Whether to show even less important leaks
409      use_gdb: Whether to use gdb to resolve source filenames and line numbers
410               in the report stacktraces
411    '''
412    self._source_dir = source_dir
413    self._show_all_leaks = show_all_leaks
414    self._use_gdb = use_gdb
415
416    # Contains the set of unique errors
417    self._errors = set()
418
419    # Contains the time when the we started analyzing the first log file.
420    # This variable is used to skip incomplete logs after some timeout.
421    self._analyze_start_time = None
422
423
424  def Report(self, files, testcase, check_sanity=False):
425    '''Reads in a set of files and prints Memcheck report.
426
427    Args:
428      files: A list of filenames.
429      check_sanity: if true, search for SANITY_TEST_SUPPRESSIONS
430    '''
431    # Beyond the detailed errors parsed by ValgrindError above,
432    # the xml file contain records describing suppressions that were used:
433    # <suppcounts>
434    #  <pair>
435    #    <count>28</count>
436    #    <name>pango_font_leak_todo</name>
437    #  </pair>
438    #  <pair>
439    #    <count>378</count>
440    #    <name>bug_13243</name>
441    #  </pair>
442    # </suppcounts
443    # Collect these and print them at the end.
444    #
445    # With our patch for https://bugs.kde.org/show_bug.cgi?id=205000 in,
446    # the file also includes records of the form
447    # <load_obj><obj>/usr/lib/libgcc_s.1.dylib</obj><ip>0x27000</ip></load_obj>
448    # giving the filename and load address of each binary that was mapped
449    # into the process.
450
451    global TheAddressTable
452    if self._use_gdb:
453      TheAddressTable = gdb_helper.AddressTable()
454    else:
455      TheAddressTable = None
456    cur_report_errors = set()
457    suppcounts = defaultdict(int)
458    badfiles = set()
459
460    if self._analyze_start_time == None:
461      self._analyze_start_time = time.time()
462    start_time = self._analyze_start_time
463
464    parse_failed = False
465    for file in files:
466      # Wait up to three minutes for valgrind to finish writing all files,
467      # but after that, just skip incomplete files and warn.
468      f = open(file, "r+")
469      pid = re.match(".*\.([0-9]+)$", file)
470      if pid:
471        pid = pid.groups()[0]
472      found = False
473      running = True
474      firstrun = True
475      skip = False
476      origsize = os.path.getsize(file)
477      while (running and not found and not skip and
478             (firstrun or
479              ((time.time() - start_time) < self.LOG_COMPLETION_TIMEOUT))):
480        firstrun = False
481        f.seek(0)
482        if pid:
483          # Make sure the process is still running so we don't wait for
484          # 3 minutes if it was killed. See http://crbug.com/17453
485          ps_out = subprocess.Popen("ps p %s" % pid, shell=True,
486                                    stdout=subprocess.PIPE).stdout
487          if len(ps_out.readlines()) < 2:
488            running = False
489        else:
490          skip = True
491          running = False
492        found = log_is_finished(f, False)
493        if not running and not found:
494          logging.warn("Valgrind process PID = %s is not running but its "
495                       "XML log has not been finished correctly.\n"
496                       "Make it up by adding some closing tags manually." % pid)
497          found = log_is_finished(f, not running)
498        if running and not found:
499          time.sleep(1)
500      f.close()
501      if not found:
502        badfiles.add(file)
503      else:
504        newsize = os.path.getsize(file)
505        if origsize > newsize+1:
506          logging.warn(str(origsize - newsize) +
507                       " bytes of junk were after </valgrindoutput> in %s!" %
508                       file)
509        try:
510          parsed_file = parse(file);
511        except ExpatError, e:
512          parse_failed = True
513          logging.warn("could not parse %s: %s" % (file, e))
514          lineno = e.lineno - 1
515          context_lines = 5
516          context_start = max(0, lineno - context_lines)
517          context_end = lineno + context_lines + 1
518          context_file = open(file, "r")
519          for i in range(0, context_start):
520            context_file.readline()
521          for i in range(context_start, context_end):
522            context_data = context_file.readline().rstrip()
523            if i != lineno:
524              logging.warn("  %s" % context_data)
525            else:
526              logging.warn("> %s" % context_data)
527          context_file.close()
528          continue
529        if TheAddressTable != None:
530          load_objs = parsed_file.getElementsByTagName("load_obj")
531          for load_obj in load_objs:
532            obj = getTextOf(load_obj, "obj")
533            ip = getTextOf(load_obj, "ip")
534            TheAddressTable.AddBinaryAt(obj, ip)
535
536        commandline = None
537        preamble = parsed_file.getElementsByTagName("preamble")[0];
538        for node in preamble.getElementsByTagName("line"):
539          if node.localName == "line":
540            for x in node.childNodes:
541              if x.nodeType == node.TEXT_NODE and "Command" in x.data:
542                commandline = x.data
543                break
544
545        raw_errors = parsed_file.getElementsByTagName("error")
546        for raw_error in raw_errors:
547          # Ignore "possible" leaks for now by default.
548          if (self._show_all_leaks or
549              getTextOf(raw_error, "kind") != "Leak_PossiblyLost"):
550            error = ValgrindError(self._source_dir,
551                                  raw_error, commandline, testcase)
552            if error not in cur_report_errors:
553              # We haven't seen such errors doing this report yet...
554              if error in self._errors:
555                # ... but we saw it in earlier reports, e.g. previous UI test
556                cur_report_errors.add("This error was already printed in "
557                                      "some other test, see 'hash=#%016X#'" % \
558                                      error.ErrorHash())
559              else:
560                # ... and we haven't seen it in other tests as well
561                self._errors.add(error)
562                cur_report_errors.add(error)
563
564        suppcountlist = parsed_file.getElementsByTagName("suppcounts")
565        if len(suppcountlist) > 0:
566          suppcountlist = suppcountlist[0]
567          for node in suppcountlist.getElementsByTagName("pair"):
568            count = getTextOf(node, "count");
569            name = getTextOf(node, "name");
570            suppcounts[name] += int(count)
571
572    if len(badfiles) > 0:
573      logging.warn("valgrind didn't finish writing %d files?!" % len(badfiles))
574      for file in badfiles:
575        logging.warn("Last 20 lines of %s :" % file)
576        os.system("tail -n 20 '%s' 1>&2" % file)
577
578    if parse_failed:
579      logging.error("FAIL! Couldn't parse Valgrind output file")
580      return -2
581
582    common.PrintUsedSuppressionsList(suppcounts)
583
584    retcode = 0
585    if cur_report_errors:
586      logging.error("FAIL! There were %s errors: " % len(cur_report_errors))
587
588      if TheAddressTable != None:
589        TheAddressTable.ResolveAll()
590
591      for error in cur_report_errors:
592        logging.error(error)
593
594      retcode = -1
595
596    # Report tool's insanity even if there were errors.
597    if check_sanity:
598      remaining_sanity_supp = MemcheckAnalyzer.SANITY_TEST_SUPPRESSIONS
599      for (name, count) in suppcounts.iteritems():
600        # Workaround for http://crbug.com/334074
601        if (name in remaining_sanity_supp and
602            remaining_sanity_supp[name] <= count):
603          del remaining_sanity_supp[name]
604      if remaining_sanity_supp:
605        logging.error("FAIL! Sanity check failed!")
606        logging.info("The following test errors were not handled: ")
607        for (name, count) in remaining_sanity_supp.iteritems():
608          logging.info("  * %dx %s" % (count, name))
609        retcode = -3
610
611    if retcode != 0:
612      return retcode
613
614    logging.info("PASS! No errors found!")
615    return 0
616
617
618def _main():
619  '''For testing only. The MemcheckAnalyzer class should be imported instead.'''
620  parser = optparse.OptionParser("usage: %prog [options] <files to analyze>")
621  parser.add_option("", "--source-dir",
622                    help="path to top of source tree for this build"
623                    "(used to normalize source paths in baseline)")
624
625  (options, args) = parser.parse_args()
626  if len(args) == 0:
627    parser.error("no filename specified")
628  filenames = args
629
630  analyzer = MemcheckAnalyzer(options.source_dir, use_gdb=True)
631  return analyzer.Report(filenames, None)
632
633
634if __name__ == "__main__":
635  sys.exit(_main())
636