1#!/usr/bin/env python
2# Copyright 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6# A script to accumulate values from the 'dmprof cat' command into CSV or else.
7#
8# Usage:
9#   ./accumulate.py -f <format> -t <template-name> < input.json > output
10#
11# <format> is one of "csv", "json", and "tree". If "csv" or "json" is given,
12# accumulate.py dumps a similar file to "dmprof csv|json". If "tree" is given,
13# accumulate.py dumps a human-readable breakdown tree.
14#
15# <template-name> is a label in templates.json.
16
17import datetime
18import json
19import logging
20import optparse
21import sys
22
23from lib.ordered_dict import OrderedDict
24
25
26LOGGER = logging.getLogger('dmprof-accumulate')
27
28
29def visit_in_template(template, snapshot, depth):
30  """Visits all categories via a given template.
31
32  This function is not used. It's a sample function to traverse a template.
33  """
34  world = template[0]
35  breakdown = template[1]
36  rules = template[2]
37
38  for rule, _ in snapshot[world]['breakdown'][breakdown].iteritems():
39    print ('  ' * depth) + rule
40    if rule in rules:
41      visit_in_template(rules[rule], snapshot, depth + 1)
42
43
44def accumulate(template, snapshot, units_dict, target_units):
45  """Accumulates units in a JSON |snapshot| with applying a given |template|.
46
47  Args:
48      template: A template tree included in a dmprof cat JSON file.
49      snapshot: A snapshot in a dmprof cat JSON file.
50      units_dict: A dict of units in worlds.
51      target_units: A list of unit ids which are a target of this accumulation.
52  """
53  world = template[0]
54  breakdown = template[1]
55  rules = template[2]
56
57  remainder_units = target_units.copy()
58  category_tree = OrderedDict()
59  total = 0
60
61  for rule, match in snapshot[world]['breakdown'][breakdown].iteritems():
62    if 'hidden' in match and match['hidden']:
63      continue
64    matched_units = set(match['units']).intersection(target_units)
65    subtotal = 0
66    for unit_id in matched_units:
67      subtotal += units_dict[world][unit_id]
68    total += subtotal
69    remainder_units = remainder_units.difference(matched_units)
70    if rule not in rules:
71      # A category matched with |rule| is a leaf of the breakdown tree.
72      # It is NOT broken down more.
73      category_tree[rule] = subtotal
74      continue
75
76    # A category matched with |rule| is broken down more.
77    subtemplate = rules[rule]
78    subworld = subtemplate[0]
79    subbreakdown = subtemplate[1]
80
81    if subworld == world:
82      # Break down in the same world: consider units.
83      category_tree[rule], accounted_total, subremainder_units = accumulate(
84          subtemplate, snapshot, units_dict, matched_units)
85      subremainder_total = 0
86      if subremainder_units:
87        for unit_id in subremainder_units:
88          subremainder_total += units_dict[world][unit_id]
89        category_tree[rule][None] = subremainder_total
90      if subtotal != accounted_total + subremainder_total:
91        print >> sys.stderr, (
92            'WARNING: Sum of %s:%s is different from %s by %d bytes.' % (
93                subworld, subbreakdown, rule,
94                subtotal - (accounted_total + subremainder_total)))
95    else:
96      # Break down in a different world: consider only the total size.
97      category_tree[rule], accounted_total, _ = accumulate(
98          subtemplate, snapshot, units_dict, set(units_dict[subworld].keys()))
99      if subtotal >= accounted_total:
100        category_tree[rule][None] = subtotal - accounted_total
101      else:
102        print >> sys.stderr, (
103            'WARNING: Sum of %s:%s is larger than %s by %d bytes.' % (
104                subworld, subbreakdown, rule, accounted_total - subtotal))
105        print >> sys.stderr, (
106            'WARNING:   Assuming remainder of %s is 0.' % rule)
107        category_tree[rule][None] = 0
108
109  return category_tree, total, remainder_units
110
111
112def flatten(category_tree, header=''):
113  """Flattens a category tree into a flat list."""
114  result = []
115  for rule, sub in category_tree.iteritems():
116    if not rule:
117      rule = 'remaining'
118    if header:
119      flattened_rule = header + '>' + rule
120    else:
121      flattened_rule = rule
122    if isinstance(sub, dict) or isinstance(sub, OrderedDict):
123      result.extend(flatten(sub, flattened_rule))
124    else:
125      result.append((flattened_rule, sub))
126  return result
127
128
129def print_category_tree(category_tree, output, depth=0):
130  """Prints a category tree in a human-readable format."""
131  for label in category_tree:
132    print >> output, ('  ' * depth),
133    if (isinstance(category_tree[label], dict) or
134        isinstance(category_tree[label], OrderedDict)):
135      print >> output, '%s:' % label
136      print_category_tree(category_tree[label], output, depth + 1)
137    else:
138      print >> output, '%s: %d' % (label, category_tree[label])
139
140
141def flatten_all_category_trees(category_trees):
142  flattened_labels = set()
143  flattened_table = []
144  for category_tree in category_trees:
145    flattened = OrderedDict()
146    for label, subtotal in flatten(category_tree):
147      flattened_labels.add(label)
148      flattened[label] = subtotal
149    flattened_table.append(flattened)
150  return flattened_labels, flattened_table
151
152
153def output_csv(output, category_trees, data, first_time, output_exponent):
154  flattened_labels, flattened_table = flatten_all_category_trees(category_trees)
155
156  sorted_flattened_labels = sorted(flattened_labels)
157  print >> output, ','.join(['second'] + sorted_flattened_labels)
158  for index, row in enumerate(flattened_table):
159    values = [str(data['snapshots'][index]['time'] - first_time)]
160    for label in sorted_flattened_labels:
161      if label in row:
162        divisor = 1
163        if output_exponent.upper() == 'K':
164          divisor = 1024.0
165        elif output_exponent.upper() == 'M':
166          divisor = 1024.0 * 1024.0
167        values.append(str(row[label] / divisor))
168      else:
169        values.append('0')
170    print >> output, ','.join(values)
171
172
173def output_json(output, category_trees, data, first_time, template_label):
174  flattened_labels, flattened_table = flatten_all_category_trees(category_trees)
175
176  json_snapshots = []
177  for index, row in enumerate(flattened_table):
178    row_with_meta = row.copy()
179    row_with_meta['second'] = data['snapshots'][index]['time'] - first_time
180    row_with_meta['dump_time'] = datetime.datetime.fromtimestamp(
181        data['snapshots'][index]['time']).strftime('%Y-%m-%d %H:%M:%S')
182    json_snapshots.append(row_with_meta)
183  json_root = {
184      'version': 'JSON_DEEP_2',
185      'policies': {
186          template_label: {
187              'legends': sorted(flattened_labels),
188              'snapshots': json_snapshots
189              }
190          }
191      }
192  json.dump(json_root, output, indent=2, sort_keys=True)
193
194
195def output_tree(output, category_trees):
196  for index, category_tree in enumerate(category_trees):
197    print >> output, '< Snapshot #%d >' % index
198    print_category_tree(category_tree, output, 1)
199    print >> output, ''
200
201
202def do_main(cat_input, output, template_label, output_format, output_exponent):
203  """Does the main work: accumulate for every snapshot and print a result."""
204  if output_format not in ['csv', 'json', 'tree']:
205    raise NotImplementedError('The output format \"%s\" is not implemented.' %
206                              output_format)
207
208  if output_exponent.upper() not in ['B', 'K', 'M']:
209    raise NotImplementedError('The exponent \"%s\" is not implemented.' %
210                              output_exponent)
211
212  data = json.loads(cat_input.read(), object_pairs_hook=OrderedDict)
213
214  templates = data['templates']
215  if not template_label:
216    template_label = data['default_template']
217  if template_label not in templates:
218    LOGGER.error('A template \'%s\' is not found.' % template_label)
219    return
220  template = templates[template_label]
221
222  category_trees = []
223  first_time = None
224
225  for snapshot in data['snapshots']:
226    if not first_time:
227      first_time = snapshot['time']
228
229    units = {}
230    for world_name in snapshot['worlds']:
231      world_units = {}
232      for unit_id, sizes in snapshot['worlds'][world_name]['units'].iteritems():
233        world_units[int(unit_id)] = sizes[0]
234      units[world_name] = world_units
235
236    category_tree, _, _ = accumulate(
237        template, snapshot['worlds'], units, set(units[template[0]].keys()))
238    category_trees.append(category_tree)
239
240  if output_format == 'csv':
241    output_csv(output, category_trees, data, first_time, output_exponent)
242  elif output_format == 'json':
243    output_json(output, category_trees, data, first_time, template_label)
244  elif output_format == 'tree':
245    output_tree(output, category_trees)
246
247
248def main():
249  LOGGER.setLevel(logging.DEBUG)
250  handler = logging.StreamHandler()
251  handler.setLevel(logging.INFO)
252  formatter = logging.Formatter('%(message)s')
253  handler.setFormatter(formatter)
254  LOGGER.addHandler(handler)
255
256  parser = optparse.OptionParser()
257  parser.add_option('-t', '--template', dest='template',
258                    metavar='TEMPLATE',
259                    help='Apply TEMPLATE to list up.')
260  parser.add_option('-f', '--format', dest='format', default='csv',
261                    help='Specify the output format: csv, json or tree.')
262  parser.add_option('-e', '--exponent', dest='exponent', default='M',
263                    help='Specify B (bytes), K (kilobytes) or M (megabytes).')
264
265  options, _ = parser.parse_args(sys.argv)
266  do_main(sys.stdin, sys.stdout,
267          options.template, options.format, options.exponent)
268
269
270if __name__ == '__main__':
271  sys.exit(main())
272