1#!/usr/bin/env python
2#
3# Copyright 2010 the V8 project authors. All rights reserved.
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8#     * Redistributions of source code must retain the above copyright
9#       notice, this list of conditions and the following disclaimer.
10#     * Redistributions in binary form must reproduce the above
11#       copyright notice, this list of conditions and the following
12#       disclaimer in the documentation and/or other materials provided
13#       with the distribution.
14#     * Neither the name of Google Inc. nor the names of its
15#       contributors may be used to endorse or promote products derived
16#       from this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29#
30
31#
32# This is an utility for plotting charts based on GC traces produced by V8 when
33# run with flags --trace-gc --trace-gc-nvp. Relies on gnuplot for actual
34# plotting.
35#
36# Usage: gc-nvp-trace-processor.py <GC-trace-filename>
37#
38
39
40from __future__ import with_statement
41import sys, types, re, subprocess, math
42
43def flatten(l):
44  flat = []
45  for i in l: flat.extend(i)
46  return flat
47
48def split_nvp(s):
49  t = {}
50  for (name, value) in re.findall(r"(\w+)=([-\w]+)", s):
51    try:
52      t[name] = int(value)
53    except ValueError:
54      t[name] = value
55
56  return t
57
58def parse_gc_trace(input):
59  trace = []
60  with open(input) as f:
61    for line in f:
62      info = split_nvp(line)
63      if info and 'pause' in info and info['pause'] > 0:
64        info['i'] = len(trace)
65        trace.append(info)
66  return trace
67
68def extract_field_names(script):
69  fields = { 'data': true, 'in': true }
70
71  for m in re.finditer(r"$(\w+)", script):
72    field_name = m.group(1)
73    if field_name not in fields:
74      fields[field] = field_count
75      field_count = field_count + 1
76
77  return fields
78
79def gnuplot(script):
80  gnuplot = subprocess.Popen(["gnuplot"], stdin=subprocess.PIPE)
81  gnuplot.stdin.write(script)
82  gnuplot.stdin.close()
83  gnuplot.wait()
84
85x1y1 = 'x1y1'
86x1y2 = 'x1y2'
87x2y1 = 'x2y1'
88x2y2 = 'x2y2'
89
90class Item(object):
91  def __init__(self, title, field, axis = x1y1, **keywords):
92    self.title = title
93    self.axis = axis
94    self.props = keywords
95    if type(field) is types.ListType:
96      self.field = field
97    else:
98      self.field = [field]
99
100  def fieldrefs(self):
101    return self.field
102
103  def to_gnuplot(self, context):
104    args = ['"%s"' % context.datafile,
105            'using %s' % context.format_fieldref(self.field),
106            'title "%s"' % self.title,
107            'axis %s' % self.axis]
108    if 'style' in self.props:
109      args.append('with %s' % self.props['style'])
110    if 'lc' in self.props:
111      args.append('lc rgb "%s"' % self.props['lc'])
112    if 'fs' in self.props:
113      args.append('fs %s' % self.props['fs'])
114    return ' '.join(args)
115
116class Plot(object):
117  def __init__(self, *items):
118    self.items = items
119
120  def fieldrefs(self):
121    return flatten([item.fieldrefs() for item in self.items])
122
123  def to_gnuplot(self, ctx):
124    return 'plot ' + ', '.join([item.to_gnuplot(ctx) for item in self.items])
125
126class Set(object):
127  def __init__(self, value):
128    self.value = value
129
130  def to_gnuplot(self, ctx):
131    return 'set ' + self.value
132
133  def fieldrefs(self):
134    return []
135
136class Context(object):
137  def __init__(self, datafile, field_to_index):
138    self.datafile = datafile
139    self.field_to_index = field_to_index
140
141  def format_fieldref(self, fieldref):
142    return ':'.join([str(self.field_to_index[field]) for field in fieldref])
143
144def collect_fields(plot):
145  field_to_index = {}
146  fields = []
147
148  def add_field(field):
149    if field not in field_to_index:
150      fields.append(field)
151      field_to_index[field] = len(fields)
152
153  for field in flatten([item.fieldrefs() for item in plot]):
154    add_field(field)
155
156  return (fields, field_to_index)
157
158def is_y2_used(plot):
159  for subplot in plot:
160    if isinstance(subplot, Plot):
161      for item in subplot.items:
162        if item.axis == x1y2 or item.axis == x2y2:
163          return True
164  return False
165
166def get_field(trace_line, field):
167  t = type(field)
168  if t is types.StringType:
169    return trace_line[field]
170  elif t is types.FunctionType:
171    return field(trace_line)
172
173def generate_datafile(datafile_name, trace, fields):
174  with open(datafile_name, 'w') as datafile:
175    for line in trace:
176      data_line = [str(get_field(line, field)) for field in fields]
177      datafile.write('\t'.join(data_line))
178      datafile.write('\n')
179
180def generate_script_and_datafile(plot, trace, datafile, output):
181  (fields, field_to_index) = collect_fields(plot)
182  generate_datafile(datafile, trace, fields)
183  script = [
184      'set terminal png',
185      'set output "%s"' % output,
186      'set autoscale',
187      'set ytics nomirror',
188      'set xtics nomirror',
189      'set key below'
190  ]
191
192  if is_y2_used(plot):
193    script.append('set autoscale y2')
194    script.append('set y2tics')
195
196  context = Context(datafile, field_to_index)
197
198  for item in plot:
199    script.append(item.to_gnuplot(context))
200
201  return '\n'.join(script)
202
203def plot_all(plots, trace, prefix):
204  charts = []
205
206  for plot in plots:
207    outfilename = "%s_%d.png" % (prefix, len(charts))
208    charts.append(outfilename)
209    script = generate_script_and_datafile(plot, trace, '~datafile', outfilename)
210    print 'Plotting %s...' % outfilename
211    gnuplot(script)
212
213  return charts
214
215def reclaimed_bytes(row):
216  return row['total_size_before'] - row['total_size_after']
217
218def other_scope(r):
219  if r['gc'] == 's':
220    # there is no 'other' scope for scavenging collections.
221    return 0
222  return r['pause'] - r['mark'] - r['sweep'] - r['external']
223
224def scavenge_scope(r):
225  if r['gc'] == 's':
226    return r['pause'] - r['external']
227  return 0
228
229
230def real_mutator(r):
231  return r['mutator'] - r['stepstook']
232
233plots = [
234  [
235    Set('style fill solid 0.5 noborder'),
236    Set('style histogram rowstacked'),
237    Set('style data histograms'),
238    Plot(Item('Scavenge', scavenge_scope, lc = 'green'),
239         Item('Marking', 'mark', lc = 'purple'),
240         Item('Sweep', 'sweep', lc = 'blue'),
241         Item('External', 'external', lc = '#489D43'),
242         Item('Other', other_scope, lc = 'grey'),
243         Item('IGC Steps', 'stepstook', lc = '#FF6347'))
244  ],
245  [
246    Set('style fill solid 0.5 noborder'),
247    Set('style histogram rowstacked'),
248    Set('style data histograms'),
249    Plot(Item('Scavenge', scavenge_scope, lc = 'green'),
250         Item('Marking', 'mark', lc = 'purple'),
251         Item('Sweep', 'sweep', lc = 'blue'),
252         Item('External', 'external', lc = '#489D43'),
253         Item('Other', other_scope, lc = '#ADD8E6'),
254         Item('External', 'external', lc = '#D3D3D3'))
255  ],
256
257  [
258    Plot(Item('Mutator', real_mutator, lc = 'black', style = 'lines'))
259  ],
260  [
261    Set('style histogram rowstacked'),
262    Set('style data histograms'),
263    Plot(Item('Heap Size (before GC)', 'total_size_before', x1y2,
264              fs = 'solid 0.4 noborder',
265              lc = 'green'),
266         Item('Total holes (after GC)', 'holes_size_before', x1y2,
267              fs = 'solid 0.4 noborder',
268              lc = 'red'),
269         Item('GC Time', ['i', 'pause'], style = 'lines', lc = 'red'))
270  ],
271  [
272    Set('style histogram rowstacked'),
273    Set('style data histograms'),
274    Plot(Item('Heap Size (after GC)', 'total_size_after', x1y2,
275              fs = 'solid 0.4 noborder',
276              lc = 'green'),
277         Item('Total holes (after GC)', 'holes_size_after', x1y2,
278              fs = 'solid 0.4 noborder',
279              lc = 'red'),
280         Item('GC Time', ['i', 'pause'],
281              style = 'lines',
282              lc = 'red'))
283  ],
284  [
285    Set('style fill solid 0.5 noborder'),
286    Set('style data histograms'),
287    Plot(Item('Allocated', 'allocated'),
288         Item('Reclaimed', reclaimed_bytes),
289         Item('Promoted', 'promoted', style = 'lines', lc = 'black'))
290  ],
291]
292
293def freduce(f, field, trace, init):
294  return reduce(lambda t,r: f(t, r[field]), trace, init)
295
296def calc_total(trace, field):
297  return freduce(lambda t,v: t + long(v), field, trace, long(0))
298
299def calc_max(trace, field):
300  return freduce(lambda t,r: max(t, r), field, trace, 0)
301
302def count_nonzero(trace, field):
303  return freduce(lambda t,r: t if r == 0 else t + 1, field, trace, 0)
304
305
306def process_trace(filename):
307  trace = parse_gc_trace(filename)
308
309  marksweeps = filter(lambda r: r['gc'] == 'ms', trace)
310  scavenges = filter(lambda r: r['gc'] == 's', trace)
311  globalgcs = filter(lambda r: r['gc'] != 's', trace)
312
313
314  charts = plot_all(plots, trace, filename)
315
316  def stats(out, prefix, trace, field):
317    n = len(trace)
318    total = calc_total(trace, field)
319    max = calc_max(trace, field)
320    if n > 0:
321      avg = total / n
322    else:
323      avg = 0
324    if n > 1:
325      dev = math.sqrt(freduce(lambda t,r: t + (r - avg) ** 2, field, trace, 0) /
326                      (n - 1))
327    else:
328      dev = 0
329
330    out.write('<tr><td>%s</td><td>%d</td><td>%d</td>'
331              '<td>%d</td><td>%d [dev %f]</td></tr>' %
332              (prefix, n, total, max, avg, dev))
333
334  def HumanReadable(size):
335    suffixes = ['bytes', 'kB', 'MB', 'GB']
336    power = 1
337    for i in range(len(suffixes)):
338      if size < power*1024:
339        return "%.1f" % (float(size) / power) + " " + suffixes[i]
340      power *= 1024
341
342  def throughput(name, trace):
343    total_live_after = calc_total(trace, 'total_size_after')
344    total_live_before = calc_total(trace, 'total_size_before')
345    total_gc = calc_total(trace, 'pause')
346    if total_gc == 0:
347      return
348    out.write('GC %s Throughput (after): %s / %s ms = %s/ms<br/>' %
349              (name,
350               HumanReadable(total_live_after),
351               total_gc,
352               HumanReadable(total_live_after / total_gc)))
353    out.write('GC %s Throughput (before): %s / %s ms = %s/ms<br/>' %
354              (name,
355               HumanReadable(total_live_before),
356               total_gc,
357               HumanReadable(total_live_before / total_gc)))
358
359
360  with open(filename + '.html', 'w') as out:
361    out.write('<html><body>')
362    out.write('<table>')
363    out.write('<tr><td>Phase</td><td>Count</td><td>Time (ms)</td>')
364    out.write('<td>Max</td><td>Avg</td></tr>')
365    stats(out, 'Total in GC', trace, 'pause')
366    stats(out, 'Scavenge', scavenges, 'pause')
367    stats(out, 'MarkSweep', marksweeps, 'pause')
368    stats(out, 'Mark', filter(lambda r: r['mark'] != 0, trace), 'mark')
369    stats(out, 'Sweep', filter(lambda r: r['sweep'] != 0, trace), 'sweep')
370    stats(out,
371          'External',
372          filter(lambda r: r['external'] != 0, trace),
373          'external')
374    out.write('</table>')
375    throughput('TOTAL', trace)
376    throughput('MS', marksweeps)
377    throughput('OLDSPACE', globalgcs)
378    out.write('<br/>')
379    for chart in charts:
380      out.write('<img src="%s">' % chart)
381      out.write('</body></html>')
382
383  print "%s generated." % (filename + '.html')
384
385if len(sys.argv) != 2:
386  print "Usage: %s <GC-trace-filename>" % sys.argv[0]
387  sys.exit(1)
388
389process_trace(sys.argv[1])
390