plot_sdcard.py revision 906d825d7033339bb37d7415bce77089bec3a2b6
1#!/usr/bin/python2.5
2#
3# Copyright 2009, The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17
18"""plot_sdcard: A module to plot the results of an sdcard perf test.
19
20Requires Gnuplot python v 1.8
21
22Typical usage:
23 -t x axis is time
24 -i x axis is iteration
25 -p profile data generated by profile_sdcard.sh
26
27./plot_sdcard.py -t /tmp/data.txt
28./plot_sdcard.py -i /tmp/data.txt
29./plot_sdcard.py -p
30
31python interpreter
32>>> import plot_sdcard as p
33>>> (metadata, data) = p.Parse('/tmp/data.txt')
34>>> p.PlotIterations(metadata, data)
35>>> p.PlotTimes(metadata, data)
36
37"""
38
39import getopt
40from itertools import izip
41import re
42import sys
43import Gnuplot
44import numpy
45
46
47class DataSet(object):
48  """Dataset holds the summary and data (time,value pairs)."""
49
50  def __init__(self, line):
51    res = re.search(('# StopWatch ([\w]+) total/cumulative '
52                     'duration ([0-9.]+). Samples: ([0-9]+)'), line)
53    self.time = []
54    self.data = []
55    self.name = res.group(1)
56    self.duration = float(res.group(2))
57    self.iteration = int(res.group(3))
58    self.summary = re.match('([a-z_]+)_total', self.name)
59
60  def __repr__(self):
61    return str(zip(self.time, self.data))
62
63  def Add(self, time, value):
64    self.time.append(time)
65    self.data.append(value)
66
67  def RescaleTo(self, length):
68    factor = len(self.data) / length
69
70    if factor > 1:
71      new_time = []
72      new_data = []
73      accum = 0.0
74      idx = 1
75      for t, d in izip(self.time, self.data):
76        accum += d
77        if idx % factor == 0:
78          new_time.append(t)
79          new_data.append(accum / factor)
80          accum = 0
81        idx += 1
82      self.time = new_time
83      self.data = new_data
84
85
86class Metadata(object):
87  def __init__(self):
88    self.kernel = ''
89    self.command_line = ''
90    self.sched = ''
91    self.name = ''
92    self.fadvise = ''
93    self.iterations = 0
94    self.duration = 0.0
95    self.complete = False
96
97  def Parse(self, line):
98    if line.startswith('# Kernel:'):
99      self.kernel = re.search('Linux version ([0-9.]+-[^ ]+)', line).group(1)
100    elif line.startswith('# Command:'):
101      self.command_line = re.search('# Command: [/\w_]+ (.*)', line).group(1)
102      self.command_line = self.command_line.replace(' --', '-')
103      self.command_line = self.command_line.replace(' -d', '')
104      self.command_line = self.command_line.replace('--test=', '')
105    elif line.startswith('# Iterations'):
106      self.iterations = int(re.search('# Iterations: ([0-9]+)', line).group(1))
107    elif line.startswith('# Fadvise'):
108      self.fadvise = re.search('# Fadvise: ([\w]+)', line).group(1)
109    elif line.startswith('# Sched'):
110      self.sched = re.search('# Sched features: ([\w]+)', line).group(1)
111      self.complete = True
112
113  def AsTitle(self):
114    return '%s-duration:%f\\n-%s\\n%s' % (
115        self.kernel, self.duration, self.command_line, self.sched)
116
117  def UpdateWith(self, dataset):
118    self.duration = max(self.duration, dataset.duration)
119    self.name = dataset.name
120
121
122def Parse(filename):
123  """Parse a file with the collected data.
124
125  The data must be in 2 rows (x,y).
126
127  Args:
128    filename: Full path to the file.
129  """
130
131  f = open(filename, 'r')
132
133  metadata = Metadata()
134  data = []  # array of dataset
135  dataset = None
136
137  for num, line in enumerate(f):
138    try:
139      line = line.strip()
140      if not line: continue
141
142      if not metadata.complete:
143        metadata.Parse(line)
144        continue
145
146      if re.match('[a-z_]', line):
147        continue
148
149      if line.startswith('# StopWatch'):  # Start of a new dataset
150        if dataset:
151          if dataset.summary:
152            metadata.UpdateWith(dataset)
153          else:
154            data.append(dataset)
155
156        dataset = DataSet(line)
157        continue
158
159      if line.startswith('#'):
160        continue
161
162      # must be data at this stage
163      try:
164        (time, value) = line.split(None, 1)
165      except ValueError:
166        print 'skipping line %d: %s' % (num, line)
167        continue
168
169      if dataset and not dataset.summary:
170        dataset.Add(float(time), float(value))
171
172    except Exception:
173      print 'Error parsing line %d' % num, sys.exc_info()[0]
174      raise
175  data.append(dataset)
176  if not metadata.complete:
177    print """Error missing metadata. Did you mount debugfs?
178    [adb shell mount -t debugfs none /sys/kernel/debug]"""
179    sys.exit(1)
180  return (metadata, data)
181
182
183def PlotIterations(metadata, data):
184  """Plot the duration of the ops against iteration.
185
186  If you are plotting data with widely different runtimes you probably want to
187  use PlotTimes instead.
188
189  For instance when readers and writers are in the same mix, the
190  readers will go thru 100 iterations much faster than the
191  writers. The load test tries to be smart about that but the final
192  iterations of the writers will likely be done w/o any influence from
193  the readers.
194
195  Args:
196    metadata: For the graph's title.
197    data: pair of to be plotted.
198  """
199
200  gp = Gnuplot.Gnuplot(persist=1)
201  gp('set data style lines')
202  gp.clear()
203  gp.xlabel('iterations')
204  gp.ylabel('duration in second')
205  gp.title(metadata.AsTitle())
206  styles = {}
207  line_style = 1
208
209  for dataset in data:
210    dataset.RescaleTo(metadata.iterations)
211    x = numpy.arange(len(dataset.data), dtype='int_')
212    if not dataset.name in styles:
213      styles[dataset.name] = line_style
214      line_style += 1
215      d = Gnuplot.Data(x, dataset.data,
216                       title=dataset.name,
217                       with_='lines ls %d' % styles[dataset.name])
218    else:  # no need to repeat a title that exists already.
219      d = Gnuplot.Data(x, dataset.data,
220                       with_='lines ls %d' % styles[dataset.name])
221
222    gp.replot(d)
223  gp.hardcopy('/tmp/%s-%s-%f.png' %
224              (metadata.name, metadata.kernel, metadata.duration),
225              terminal='png')
226
227
228def PlotTimes(metadata, data):
229  """Plot the duration of the ops against time elapsed.
230
231  Args:
232    metadata: For the graph's title.
233    data: pair of to be plotted.
234  """
235
236  gp = Gnuplot.Gnuplot(persist=1)
237  gp('set data style impulses')
238  gp('set xtics 1')
239  gp.clear()
240  gp.xlabel('seconds')
241  gp.ylabel('duration in second')
242  gp.title(metadata.AsTitle())
243  styles = {}
244  line_style = 1
245
246  for dataset in data:
247    x = numpy.array(dataset.time, dtype='float_')
248    if not dataset.name in styles:
249      styles[dataset.name] = line_style
250      line_style += 1
251      d = Gnuplot.Data(x, dataset.data,
252                       title=dataset.name,
253                       with_='impulses ls %d' % styles[dataset.name])
254    else:  # no need to repeat a title that exists already.
255      d = Gnuplot.Data(x, dataset.data,
256                       with_='impulses ls %d' % styles[dataset.name])
257
258    gp.replot(d)
259  gp.hardcopy('/tmp/%s-%s-%f.png' %
260              (metadata.name, metadata.kernel, metadata.duration),
261              terminal='png')
262
263
264def PlotProfile():
265  """Plot the time of a run against the number of processes."""
266  (metadata, data) = Parse('/tmp/sdcard-scalability.txt')
267  gp = Gnuplot.Gnuplot(persist=1)
268  gp('set data style impulses')
269  gp('set xtics 1')
270  gp('set pointsize 2')
271  gp.clear()
272  gp.xlabel('writer process')
273  gp.ylabel('duration in second')
274  gp.title(metadata.AsTitle())
275
276  dataset = data[0]
277  x = numpy.array(dataset.time, dtype='int_')
278  d = Gnuplot.Data(x, dataset.data,
279                   title=dataset.name,
280                   with_='linespoints')
281  gp.replot(d)
282  gp.hardcopy('/tmp/%s-%s-%f.png' %
283              (metadata.name, metadata.kernel, metadata.duration),
284              terminal='png')
285
286
287def Usage():
288  """Print this module's usage."""
289  print """
290  To plot the result using the iter number of the x axis:
291
292    plot_sdcard.py -i /tmp/data.txt
293
294  To plot the result using time for the x axis:
295
296    plot_sdcard.py -t /tmp/data.txt
297
298  To plot the result from the profiler:
299
300    profile_sdcard.sh
301    plot_sdcard.py -p
302
303  """
304  sys.exit(2)
305
306
307def main(argv):
308  try:
309    (optlist, args) = getopt.getopt(argv[1:],
310                                    'itp', ['iteration', 'time', 'profile'])
311  except getopt.GetoptError, err:
312    print str(err)
313    Usage()
314
315  for flag, val in optlist:
316    if flag in ('-i', '--iteration'):
317      (metadata, data) = Parse(args[0])
318      PlotIterations(metadata, data)
319      sys.exit(0)
320    elif flag in ('-t', '--time'):
321      (metadata, data) = Parse(args[0])
322      PlotTimes(metadata, data)
323      sys.exit(0)
324    elif flag in ('-p', '--profile'):
325      PlotProfile()
326      sys.exit(0)
327  Usage()
328
329
330if __name__ == '__main__':
331  main(sys.argv)
332