1#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import argparse, os, sys, json, subprocess, pickle, StringIO
7
8parser = argparse.ArgumentParser(
9  description =
10    "Process the Blink points-to graph generated by the Blink GC plugin.")
11
12parser.add_argument(
13  '-', dest='use_stdin', action='store_true',
14  help='Read JSON graph files from stdin')
15
16parser.add_argument(
17  '-c', '--detect-cycles', action='store_true',
18  help='Detect cycles containing GC roots')
19
20parser.add_argument(
21  '-s', '--print-stats', action='store_true',
22  help='Statistics about ref-counted and traced objects')
23
24parser.add_argument(
25  '-v', '--verbose', action='store_true',
26  help='Verbose output')
27
28parser.add_argument(
29  '--ignore-cycles', default=None, metavar='FILE',
30  help='File with cycles to ignore')
31
32parser.add_argument(
33  '--ignore-classes', nargs='*', default=[], metavar='CLASS',
34  help='Classes to ignore when detecting cycles')
35
36parser.add_argument(
37  '--pickle-graph', default=None, metavar='FILE',
38  help='File to read/save the graph from/to')
39
40parser.add_argument(
41  'files', metavar='FILE_OR_DIR', nargs='*', default=[],
42  help='JSON graph files or directories containing them')
43
44# Command line args after parsing.
45args = None
46
47# Map from node labels to nodes.
48graph = {}
49
50# Set of root nodes.
51roots = []
52
53# List of cycles to ignore.
54ignored_cycles = []
55
56# Global flag to determine exit code.
57global_reported_error = False
58
59def set_reported_error(value):
60  global global_reported_error
61  global_reported_error = value
62
63def reported_error():
64  return global_reported_error
65
66def log(msg):
67  if args.verbose:
68    print msg
69
70global_inc_copy = 0
71def inc_copy():
72  global global_inc_copy
73  global_inc_copy += 1
74
75def get_node(name):
76  return graph.setdefault(name, Node(name))
77
78ptr_types = ('raw', 'ref', 'mem')
79
80def inc_ptr(dst, ptr):
81  if ptr in ptr_types:
82    node = graph.get(dst)
83    if not node: return
84    node.counts[ptr] += 1
85
86def add_counts(s1, s2):
87  for (k, v) in s2.iteritems():
88    s1[k] += s2[k]
89
90# Representation of graph nodes. Basically a map of directed edges.
91class Node:
92  def __init__(self, name):
93    self.name = name
94    self.edges = {}
95    self.reset()
96  def __repr__(self):
97    return "%s(%s) %s" % (self.name, self.visited, self.edges)
98  def update_node(self, decl):
99    # Currently we don't track any node info besides its edges.
100    pass
101  def update_edge(self, e):
102    new_edge = Edge(**e)
103    edge = self.edges.get(new_edge.key)
104    if edge:
105      # If an edge exist, its kind is the strongest of the two.
106      edge.kind = max(edge.kind, new_edge.kind)
107    else:
108      self.edges[new_edge.key] = new_edge
109  def super_edges(self):
110    return [ e for e in self.edges.itervalues() if e.is_super() ]
111  def subclass_edges(self):
112    return [ e for e in self.edges.itervalues() if e.is_subclass() ]
113  def reset(self):
114    self.cost = sys.maxint
115    self.visited = False
116    self.path = None
117    self.counts = {}
118    for ptr in ptr_types:
119      self.counts[ptr] = 0
120  def update_counts(self):
121    for e in self.edges.itervalues():
122      inc_ptr(e.dst, e.ptr)
123
124# Representation of directed graph edges.
125class Edge:
126  def __init__(self, **decl):
127    self.src = decl['src']
128    self.dst = decl['dst']
129    self.lbl = decl['lbl']
130    self.ptr = decl['ptr']
131    self.kind = decl['kind'] # 0 = weak, 1 = strong, 2 = root
132    self.loc = decl['loc']
133    # The label does not uniquely determine an edge from a node. We
134    # define the semi-unique key to be the concatenation of the
135    # label and dst name. This is sufficient to track the strongest
136    # edge to a particular type. For example, if the field A::m_f
137    # has type HashMap<WeakMember<B>, Member<B>> we will have a
138    # strong edge with key m_f#B from A to B.
139    self.key = '%s#%s' % (self.lbl, self.dst)
140  def __repr__(self):
141    return '%s (%s) => %s' % (self.src, self.lbl, self.dst)
142  def is_root(self):
143    return self.kind == 2
144  def is_weak(self):
145    return self.kind == 0
146  def keeps_alive(self):
147    return self.kind > 0
148  def is_subclass(self):
149    return self.lbl.startswith('<subclass>')
150  def is_super(self):
151    return self.lbl.startswith('<super>')
152
153def parse_file(filename):
154  obj = json.load(open(filename))
155  return obj
156
157def build_graphs_in_dir(dirname):
158  # TODO: Use plateform independent code, eg, os.walk
159  files = subprocess.check_output(
160    ['find', dirname, '-name', '*.graph.json']).split('\n')
161  log("Found %d files" % len(files))
162  for f in files:
163    f.strip()
164    if len(f) < 1:
165      continue
166    build_graph(f)
167
168def build_graph(filename):
169  for decl in parse_file(filename):
170    if decl.has_key('name'):
171      # Add/update a node entry
172      name = decl['name']
173      node = get_node(name)
174      node.update_node(decl)
175    else:
176      # Add/update an edge entry
177      name = decl['src']
178      node = get_node(name)
179      node.update_edge(decl)
180
181# Copy all non-weak edges from super classes to their subclasses.
182# This causes all fields of a super to be considered fields of a
183# derived class without tranitively relating derived classes with
184# each other. For example, if B <: A, C <: A, and for some D, D => B,
185# we don't want that to entail that D => C.
186def copy_super_edges(edge):
187  if edge.is_weak() or not edge.is_super():
188    return
189  inc_copy()
190  # Make the super-class edge weak (prohibits processing twice).
191  edge.kind = 0
192  # If the super class is not in our graph exit early.
193  super_node = graph.get(edge.dst)
194  if super_node is None: return
195  # Recursively copy all super-class edges.
196  for e in super_node.super_edges():
197    copy_super_edges(e)
198  # Copy strong super-class edges (ignoring sub-class edges) to the sub class.
199  sub_node = graph[edge.src]
200  for e in super_node.edges.itervalues():
201    if e.keeps_alive() and not e.is_subclass():
202      new_edge = Edge(
203        src = sub_node.name,
204        dst = e.dst,
205        lbl = '%s <: %s' % (super_node.name, e.lbl),
206        ptr = e.ptr,
207        kind = e.kind,
208        loc = e.loc,
209      )
210      sub_node.edges[new_edge.key] = new_edge
211  # Add a strong sub-class edge.
212  sub_edge = Edge(
213    src = super_node.name,
214    dst = sub_node.name,
215    lbl = '<subclass>',
216    ptr = edge.ptr,
217    kind = 1,
218    loc = edge.loc,
219  )
220  super_node.edges[sub_edge.key] = sub_edge
221
222def complete_graph():
223  for node in graph.itervalues():
224    for edge in node.super_edges():
225      copy_super_edges(edge)
226    for edge in node.edges.itervalues():
227      if edge.is_root():
228        roots.append(edge)
229  log("Copied edges down <super> edges for %d graph nodes" % global_inc_copy)
230
231def reset_graph():
232  for n in graph.itervalues():
233    n.reset()
234
235def shortest_path(start, end):
236  start.cost = 0
237  minlist = [start]
238  while len(minlist) > 0:
239    minlist.sort(key=lambda n: -n.cost)
240    current = minlist.pop()
241    current.visited = True
242    if current == end or current.cost >= end.cost + 1:
243      return
244    for e in current.edges.itervalues():
245      if not e.keeps_alive():
246        continue
247      dst = graph.get(e.dst)
248      if dst is None or dst.visited:
249        continue
250      if current.cost < dst.cost:
251        dst.cost = current.cost + 1
252        dst.path = e
253      minlist.append(dst)
254
255def detect_cycles():
256  for root_edge in roots:
257    reset_graph()
258    # Mark ignored classes as already visited
259    for ignore in args.ignore_classes:
260      name = ignore.find("::") > 0 and ignore or ("blink::" + ignore)
261      node = graph.get(name)
262      if node:
263        node.visited = True
264    src = graph[root_edge.src]
265    dst = graph.get(root_edge.dst)
266    if src.visited:
267      continue
268    if root_edge.dst == "WTF::String":
269      continue
270    if dst is None:
271      print "\nPersistent root to incomplete destination object:"
272      print root_edge
273      set_reported_error(True)
274      continue
275    # Find the shortest path from the root target (dst) to its host (src)
276    shortest_path(dst, src)
277    if src.cost < sys.maxint:
278      report_cycle(root_edge)
279
280def is_ignored_cycle(cycle):
281  for block in ignored_cycles:
282    if block_match(cycle, block):
283      return True
284
285def block_match(b1, b2):
286  if len(b1) != len(b2):
287    return False
288  for (l1, l2) in zip(b1, b2):
289    if l1 != l2:
290      return False
291  return True
292
293def report_cycle(root_edge):
294  dst = graph[root_edge.dst]
295  path = []
296  edge = root_edge
297  dst.path = None
298  while edge:
299    path.append(edge)
300    edge = graph[edge.src].path
301  path.append(root_edge)
302  path.reverse()
303  # Find the max loc length for pretty printing.
304  max_loc = 0
305  for p in path:
306    if len(p.loc) > max_loc:
307      max_loc = len(p.loc)
308  out = StringIO.StringIO()
309  for p in path[:-1]:
310    print >>out, (p.loc + ':').ljust(max_loc + 1), p
311  sout = out.getvalue()
312  if not is_ignored_cycle(sout):
313    print "\nFound a potentially leaking cycle starting from a GC root:\n", sout
314    set_reported_error(True)
315
316def load_graph():
317  global graph
318  global roots
319  log("Reading graph from pickled file: " + args.pickle_graph)
320  dump = pickle.load(open(args.pickle_graph, 'rb'))
321  graph = dump[0]
322  roots = dump[1]
323
324def save_graph():
325  log("Saving graph to pickle file: " + args.pickle_graph)
326  dump = (graph, roots)
327  pickle.dump(dump, open(args.pickle_graph, 'wb'))
328
329def read_ignored_cycles():
330  global ignored_cycles
331  if not args.ignore_cycles:
332    return
333  log("Reading ignored cycles from file: " + args.ignore_cycles)
334  block = []
335  for l in open(args.ignore_cycles):
336    line = l.strip()
337    if not line or line.startswith('Found'):
338      if len(block) > 0:
339        ignored_cycles.append(block)
340      block = []
341    else:
342      block += l
343  if len(block) > 0:
344    ignored_cycles.append(block)
345
346gc_bases = (
347  'blink::GarbageCollected',
348  'blink::GarbageCollectedFinalized',
349  'blink::GarbageCollectedMixin',
350)
351ref_bases = (
352  'WTF::RefCounted',
353  'WTF::ThreadSafeRefCounted',
354)
355gcref_bases = (
356  'blink::RefCountedGarbageCollected',
357  'blink::ThreadSafeRefCountedGarbageCollected',
358)
359ref_mixins = (
360  'blink::EventTarget',
361  'blink::EventTargetWithInlineData',
362  'blink::ActiveDOMObject',
363)
364
365def print_stats():
366  gcref_managed = []
367  ref_managed = []
368  gc_managed = []
369  hierarchies = []
370
371  for node in graph.itervalues():
372    node.update_counts()
373    for sup in node.super_edges():
374      if sup.dst in gcref_bases:
375        gcref_managed.append(node)
376      elif sup.dst in ref_bases:
377        ref_managed.append(node)
378      elif sup.dst in gc_bases:
379        gc_managed.append(node)
380
381  groups = [("GC manged   ", gc_managed),
382            ("ref counted ", ref_managed),
383            ("in transition", gcref_managed)]
384  total = sum([len(g) for (s,g) in groups])
385  for (s, g) in groups:
386    percent = len(g) * 100 / total
387    print "%2d%% is %s (%d hierarchies)" % (percent, s, len(g))
388
389  for base in gcref_managed:
390    stats = dict({ 'classes': 0, 'ref-mixins': 0 })
391    for ptr in ptr_types: stats[ptr] = 0
392    hierarchy_stats(base, stats)
393    hierarchies.append((base, stats))
394
395  print "\nHierarchies in transition (RefCountedGarbageCollected):"
396  hierarchies.sort(key=lambda (n,s): -s['classes'])
397  for (node, stats) in hierarchies:
398    total = stats['mem'] + stats['ref'] + stats['raw']
399    print (
400      "%s %3d%% of %-30s: %3d cls, %3d mem, %3d ref, %3d raw, %3d ref-mixins" %
401      (stats['ref'] == 0 and stats['ref-mixins'] == 0 and "*" or " ",
402       total == 0 and 100 or stats['mem'] * 100 / total,
403       node.name.replace('blink::', ''),
404       stats['classes'],
405       stats['mem'],
406       stats['ref'],
407       stats['raw'],
408       stats['ref-mixins'],
409     ))
410
411def hierarchy_stats(node, stats):
412  if not node: return
413  stats['classes'] += 1
414  add_counts(stats, node.counts)
415  for edge in node.super_edges():
416    if edge.dst in ref_mixins:
417      stats['ref-mixins'] += 1
418  for edge in node.subclass_edges():
419    hierarchy_stats(graph.get(edge.dst), stats)
420
421def main():
422  global args
423  args = parser.parse_args()
424  if not (args.detect_cycles or args.print_stats):
425    print "Please select an operation to perform (eg, -c to detect cycles)"
426    parser.print_help()
427    return 1
428  if args.pickle_graph and os.path.isfile(args.pickle_graph):
429    load_graph()
430  else:
431    if args.use_stdin:
432      log("Reading files from stdin")
433      for f in sys.stdin:
434        build_graph(f.strip())
435    else:
436      log("Reading files and directories from command line")
437      if len(args.files) == 0:
438        print "Please provide files or directores for building the graph"
439        parser.print_help()
440        return 1
441      for f in args.files:
442        if os.path.isdir(f):
443          log("Building graph from files in directory: " + f)
444          build_graphs_in_dir(f)
445        else:
446          log("Building graph from file: " + f)
447          build_graph(f)
448    log("Completing graph construction (%d graph nodes)" % len(graph))
449    complete_graph()
450    if args.pickle_graph:
451      save_graph()
452  if args.detect_cycles:
453    read_ignored_cycles()
454    log("Detecting cycles containg GC roots")
455    detect_cycles()
456  if args.print_stats:
457    log("Printing statistics")
458    print_stats()
459  if reported_error():
460    return 1
461  return 0
462
463if __name__ == '__main__':
464  sys.exit(main())
465