17dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch#!/usr/bin/env python
27dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch# Copyright 2013 The Chromium Authors. All rights reserved.
37dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch# Use of this source code is governed by a BSD-style license that can be
47dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch# found in the LICENSE file.
57dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
67dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch"""Parses CSV output from the loading_measurement and outputs interesting stats.
77dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
87dbb3d5cf0c15f500944d211057644d6a2f37371Ben MurdochExample usage:
97dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch$ tools/perf/run_measurement --browser=release \
107dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    --output-format=csv --output=/path/to/loading_measurement_output.csv \
110529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch    loading_measurement tools/perf/page_sets/top_1m.py
12a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch$ tools/perf/measurements/loading_measurement_analyzer.py \
137dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    --num-slowest-urls=100 --rank-csv-file=/path/to/top-1m.csv \
147dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    /path/to/loading_measurement_output.csv
157dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch"""
167dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
177dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport collections
187dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport csv
197dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport heapq
207dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport optparse
217dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport os
22a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)import re
237dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochimport sys
247dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
257dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
267dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochclass LoadingMeasurementAnalyzer(object):
277dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
287dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  def __init__(self, input_file, options):
297dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    self.ranks = {}
307dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    self.totals = collections.defaultdict(list)
317dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    self.maxes = collections.defaultdict(list)
327dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    self.avgs = collections.defaultdict(list)
33a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    self.load_times = []
34a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    self.cpu_times = []
35a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    self.network_percents = []
367dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    self.num_rows_parsed = 0
377dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    self.num_slowest_urls = options.num_slowest_urls
387dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    if options.rank_csv_file:
397dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      self._ParseRankCsvFile(os.path.expanduser(options.rank_csv_file))
40ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch    self._ParseInputFile(input_file, options)
41a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    self._display_zeros = options.display_zeros
427dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
437dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  def _ParseInputFile(self, input_file, options):
447dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    with open(input_file, 'r') as csvfile:
457dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      row_dict = csv.DictReader(csvfile)
467dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      for row in row_dict:
47ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch        if (options.rank_limit and
48ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch            self._GetRank(row['url']) > options.rank_limit):
49ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch          continue
50a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch        cpu_time = 0
51a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch        load_time = float(row['load_time (ms)'])
52a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch        if load_time < 0:
53a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch          print 'Skipping %s due to negative load time' % row['url']
54a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch          continue
557dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        for key, value in row.iteritems():
56a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch          if key in ('url', 'load_time (ms)', 'dom_content_loaded_time (ms)'):
577dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch            continue
58a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch          if not value or value == '-':
597dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch            continue
60a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch          value = float(value)
61a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)          if not value:
62a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)            continue
637dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch          if '_avg' in key:
64a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch            self.avgs[key].append((value, row['url']))
657dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch          elif '_max' in key:
66a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch            self.maxes[key].append((value, row['url']))
677dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch          else:
68a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch            self.totals[key].append((value, row['url']))
69a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch            cpu_time += value
70a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch        self.load_times.append((load_time, row['url']))
71a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch        self.cpu_times.append((cpu_time, row['url']))
72a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch        if options.show_network:
73a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch          network_time = load_time - cpu_time
74a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch          self.totals['Network (ms)'].append((network_time, row['url']))
75a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch          self.network_percents.append((network_time / load_time, row['url']))
767dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        self.num_rows_parsed += 1
777dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        if options.max_rows and self.num_rows_parsed == int(options.max_rows):
787dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch          break
797dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
807dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  def _ParseRankCsvFile(self, input_file):
817dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    with open(input_file, 'r') as csvfile:
827dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      for row in csv.reader(csvfile):
837dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        assert len(row) == 2
847dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch        self.ranks[row[1]] = int(row[0])
857dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
867dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  def _GetRank(self, url):
877dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    url = url.replace('http://', '')
887dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    if url in self.ranks:
897dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      return self.ranks[url]
907dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    return len(self.ranks)
917dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
92a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  def PrintSummary(self, stdout):
937dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    sum_totals = {}
94a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    units = None
957dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    for key, values in self.totals.iteritems():
96a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      m = re.match('.* [(](.*)[)]', key)
97a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      assert m, 'All keys should have units.'
98a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      assert not units or units == m.group(1), 'All units should be the same.'
99a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      units = m.group(1)
1007dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      sum_totals[key] = sum([v[0] for v in values])
101a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    total_cpu_time = sum([v[0] for v in self.cpu_times])
102a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    total_page_load_time = sum([v[0] for v in self.load_times])
1037dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
104a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    print >> stdout
105a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    print >> stdout, 'Total URLs:', self.num_rows_parsed
106a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    print >> stdout, 'Total page load time: %ds' % int(round(
107a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)        total_page_load_time / 1000))
108a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    print >> stdout, 'Average page load time: %dms' % int(round(
109a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch        total_page_load_time / self.num_rows_parsed))
110a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    if units == 'ms':
111a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      print >> stdout, 'Total CPU time: %ds' % int(round(total_cpu_time / 1000))
112a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      print >> stdout, 'Average CPU time: %dms' % int(round(
113a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)          total_cpu_time / self.num_rows_parsed))
114a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    print >> stdout
1157dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    for key, value in sorted(sum_totals.iteritems(), reverse=True,
1167dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch                             key=lambda i: i[1]):
117a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      if not self._display_zeros and not int(value / 100.):
118a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)        break
119a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      output_key = '%60s: ' % re.sub(' [(].*[)]', '', key)
120a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      if units == 'ms':
121a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)        output_value = '%10ds ' % (value / 1000)
122a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)        output_percent = '%.1f%%' % (100 * value / total_page_load_time)
123a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      else:
124a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)        output_value = '%10d%s ' % (value, units)
125a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)        output_percent = '%.1f%%' % (100 * value / total_cpu_time)
126a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      print >> stdout, output_key, output_value, output_percent
1277dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1287dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    if not self.num_slowest_urls:
1297dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      return
1307dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
131a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    for key, values in sorted(self.totals.iteritems(), reverse=True,
132a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch                              key=lambda i: sum_totals[i[0]]):
133a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      if not self._display_zeros and not int(sum_totals[key] / 100.):
134a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)        break
135a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      print >> stdout
136a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      print >> stdout, 'Top %d slowest %s:' % (self.num_slowest_urls,
137a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)                                               re.sub(' [(].*[)]', '', key))
1387dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      slowest = heapq.nlargest(self.num_slowest_urls, values)
1397dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      for value, url in slowest:
140a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)        print >> stdout, '%10d%s\t%s (#%s)' % (value, units, url,
141a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)                                               self._GetRank(url))
1427dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
143a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    if self.network_percents:
144a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      print >> stdout
145a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      print >> stdout, 'Top %d highest network to CPU time ratios:' % (
146a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)          self.num_slowest_urls)
147a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch      for percent, url in sorted(
148a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch          self.network_percents, reverse=True)[:self.num_slowest_urls]:
149a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch        percent *= 100
150a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)        print >> stdout, '\t', '%.1f%%' % percent, url, '(#%s)' % (
151a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)            self._GetRank(url))
152a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch
153a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch
154a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)def main(arguments, stdout=sys.stdout):
1557dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  prog_desc = 'Parses CSV output from the loading_measurement'
1567dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  parser = optparse.OptionParser(usage=('%prog [options]' + '\n\n' + prog_desc))
1577dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1587dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  parser.add_option('--max-rows', type='int',
1597dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch                    help='Only process this many rows')
1607dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  parser.add_option('--num-slowest-urls', type='int',
1617dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch                    help='Output this many slowest URLs for each category')
1627dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  parser.add_option('--rank-csv-file', help='A CSV file of <rank,url>')
163ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  parser.add_option('--rank-limit', type='int',
164ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch                    help='Only process pages higher than this rank')
165a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  parser.add_option('--show-network', action='store_true',
166a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch                    help='Whether to display Network as a category')
167a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  parser.add_option('--display-zeros', action='store_true',
168a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)                    help='Whether to display categories with zero time')
1697dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
170a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  options, args = parser.parse_args(arguments)
1717dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1727dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  assert len(args) == 1, 'Must pass exactly one CSV file to analyze'
173ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  if options.rank_limit and not options.rank_csv_file:
174ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch    print 'Must pass --rank-csv-file with --rank-limit'
175ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch    return 1
1767dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
177a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  LoadingMeasurementAnalyzer(args[0], options).PrintSummary(stdout)
1787dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1797dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  return 0
1807dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1817dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
1827dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochif __name__ == '__main__':
183a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  sys.exit(main(sys.argv[1:]))
184