1# Copyright 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import datetime
6import json
7import logging
8import sys
9
10from lib.pageframe import PFNCounts
11from lib.policy import PolicySet
12from lib.subcommand import SubCommand
13
14
15LOGGER = logging.getLogger('dmprof')
16
17
18class PolicyCommands(SubCommand):
19  def __init__(self, command):
20    super(PolicyCommands, self).__init__(
21        'Usage: %%prog %s [-p POLICY] <first-dump> [shared-first-dumps...]' %
22        command)
23    self._parser.add_option('-p', '--policy', type='string', dest='policy',
24                            help='profile with POLICY', metavar='POLICY')
25    self._parser.add_option('--alternative-dirs', dest='alternative_dirs',
26                            metavar='/path/on/target@/path/on/host[:...]',
27                            help='Read files in /path/on/host/ instead of '
28                                 'files in /path/on/target/.')
29    self._parser.add_option('--timestamp', dest='timestamp',
30                            action='store_true', help='Use timestamp.')
31    self._timestamp = False
32
33  def _set_up(self, sys_argv):
34    options, args = self._parse_args(sys_argv, 1)
35    dump_path = args[1]
36    shared_first_dump_paths = args[2:]
37    alternative_dirs_dict = {}
38    if options.alternative_dirs:
39      for alternative_dir_pair in options.alternative_dirs.split(':'):
40        target_path, host_path = alternative_dir_pair.split('@', 1)
41        alternative_dirs_dict[target_path] = host_path
42    (bucket_set, dumps) = SubCommand.load_basic_files(
43        dump_path, True, alternative_dirs=alternative_dirs_dict)
44
45    self._timestamp = options.timestamp
46
47    pfn_counts_dict = {}
48    for shared_first_dump_path in shared_first_dump_paths:
49      shared_dumps = SubCommand._find_all_dumps(shared_first_dump_path)
50      for shared_dump in shared_dumps:
51        pfn_counts = PFNCounts.load(shared_dump)
52        if pfn_counts.pid not in pfn_counts_dict:
53          pfn_counts_dict[pfn_counts.pid] = []
54        pfn_counts_dict[pfn_counts.pid].append(pfn_counts)
55
56    policy_set = PolicySet.load(SubCommand._parse_policy_list(options.policy))
57    return policy_set, dumps, pfn_counts_dict, bucket_set
58
59  def _apply_policy(self, dump, pfn_counts_dict, policy, bucket_set,
60                    first_dump_time):
61    """Aggregates the total memory size of each component.
62
63    Iterate through all stacktraces and attribute them to one of the components
64    based on the policy.  It is important to apply policy in right order.
65
66    Args:
67        dump: A Dump object.
68        pfn_counts_dict: A dict mapping a pid to a list of PFNCounts.
69        policy: A Policy object.
70        bucket_set: A BucketSet object.
71        first_dump_time: An integer representing time when the first dump is
72            dumped.
73
74    Returns:
75        A dict mapping components and their corresponding sizes.
76    """
77    LOGGER.info('  %s' % dump.path)
78    all_pfn_dict = {}
79    if pfn_counts_dict:
80      LOGGER.info('    shared with...')
81      for pid, pfnset_list in pfn_counts_dict.iteritems():
82        closest_pfnset_index = None
83        closest_pfnset_difference = 1024.0
84        for index, pfnset in enumerate(pfnset_list):
85          time_difference = pfnset.time - dump.time
86          if time_difference >= 3.0:
87            break
88          elif ((time_difference < 0.0 and pfnset.reason != 'Exiting') or
89                (0.0 <= time_difference and time_difference < 3.0)):
90            closest_pfnset_index = index
91            closest_pfnset_difference = time_difference
92          elif time_difference < 0.0 and pfnset.reason == 'Exiting':
93            closest_pfnset_index = None
94            break
95        if closest_pfnset_index:
96          for pfn, count in pfnset_list[closest_pfnset_index].iter_pfn:
97            all_pfn_dict[pfn] = all_pfn_dict.get(pfn, 0) + count
98          LOGGER.info('      %s (time difference = %f)' %
99                      (pfnset_list[closest_pfnset_index].path,
100                       closest_pfnset_difference))
101        else:
102          LOGGER.info('      (no match with pid:%d)' % pid)
103
104    sizes = dict((c, 0) for c in policy.components)
105
106    PolicyCommands._accumulate_malloc(dump, policy, bucket_set, sizes)
107    verify_global_stats = PolicyCommands._accumulate_maps(
108        dump, all_pfn_dict, policy, bucket_set, sizes)
109
110    # TODO(dmikurube): Remove the verifying code when GLOBAL_STATS is removed.
111    # http://crbug.com/245603.
112    for verify_key, verify_value in verify_global_stats.iteritems():
113      dump_value = dump.global_stat('%s_committed' % verify_key)
114      if dump_value != verify_value:
115        LOGGER.warn('%25s: %12d != %d (%d)' % (
116            verify_key, dump_value, verify_value, dump_value - verify_value))
117
118    sizes['mmap-no-log'] = (
119        dump.global_stat('profiled-mmap_committed') -
120        sizes['mmap-total-log'])
121    sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed')
122    sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual')
123
124    sizes['tc-no-log'] = (
125        dump.global_stat('profiled-malloc_committed') -
126        sizes['tc-total-log'])
127    sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed')
128    sizes['tc-unused'] = (
129        sizes['mmap-tcmalloc'] -
130        dump.global_stat('profiled-malloc_committed'))
131    if sizes['tc-unused'] < 0:
132      LOGGER.warn('    Assuming tc-unused=0 as it is negative: %d (bytes)' %
133                  sizes['tc-unused'])
134      sizes['tc-unused'] = 0
135    sizes['tc-total'] = sizes['mmap-tcmalloc']
136
137    # TODO(dmikurube): global_stat will be deprecated.
138    # See http://crbug.com/245603.
139    for key, value in {
140        'total': 'total_committed',
141        'filemapped': 'file_committed',
142        'absent': 'absent_committed',
143        'file-exec': 'file-exec_committed',
144        'file-nonexec': 'file-nonexec_committed',
145        'anonymous': 'anonymous_committed',
146        'stack': 'stack_committed',
147        'other': 'other_committed',
148        'unhooked-absent': 'nonprofiled-absent_committed',
149        'total-vm': 'total_virtual',
150        'filemapped-vm': 'file_virtual',
151        'anonymous-vm': 'anonymous_virtual',
152        'other-vm': 'other_virtual' }.iteritems():
153      if key in sizes:
154        sizes[key] = dump.global_stat(value)
155
156    if 'mustbezero' in sizes:
157      removed_list = (
158          'profiled-mmap_committed',
159          'nonprofiled-absent_committed',
160          'nonprofiled-anonymous_committed',
161          'nonprofiled-file-exec_committed',
162          'nonprofiled-file-nonexec_committed',
163          'nonprofiled-stack_committed',
164          'nonprofiled-other_committed')
165      sizes['mustbezero'] = (
166          dump.global_stat('total_committed') -
167          sum(dump.global_stat(removed) for removed in removed_list))
168    if 'total-exclude-profiler' in sizes:
169      sizes['total-exclude-profiler'] = (
170          dump.global_stat('total_committed') -
171          (sizes['mmap-profiler'] + sizes['mmap-type-profiler']))
172    if 'hour' in sizes:
173      sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0
174    if 'minute' in sizes:
175      sizes['minute'] = (dump.time - first_dump_time) / 60.0
176    if 'second' in sizes:
177      if self._timestamp:
178        sizes['second'] = datetime.datetime.fromtimestamp(dump.time).isoformat()
179      else:
180        sizes['second'] = dump.time - first_dump_time
181
182    return sizes
183
184  @staticmethod
185  def _accumulate_malloc(dump, policy, bucket_set, sizes):
186    for bucket_id, _, committed, _, _ in dump.iter_stacktrace:
187      bucket = bucket_set.get(bucket_id)
188      if not bucket or bucket.allocator_type == 'malloc':
189        component_match = policy.find_malloc(bucket)
190      elif bucket.allocator_type == 'mmap':
191        continue
192      else:
193        assert False
194      sizes[component_match] += committed
195
196      assert not component_match.startswith('mmap-')
197      if component_match.startswith('tc-'):
198        sizes['tc-total-log'] += committed
199      else:
200        sizes['other-total-log'] += committed
201
202  @staticmethod
203  def _accumulate_maps(dump, pfn_dict, policy, bucket_set, sizes):
204    # TODO(dmikurube): Remove the dict when GLOBAL_STATS is removed.
205    # http://crbug.com/245603.
206    global_stats = {
207        'total': 0,
208        'file-exec': 0,
209        'file-nonexec': 0,
210        'anonymous': 0,
211        'stack': 0,
212        'other': 0,
213        'nonprofiled-file-exec': 0,
214        'nonprofiled-file-nonexec': 0,
215        'nonprofiled-anonymous': 0,
216        'nonprofiled-stack': 0,
217        'nonprofiled-other': 0,
218        'profiled-mmap': 0,
219        }
220
221    for key, value in dump.iter_map:
222      # TODO(dmikurube): Remove the subtotal code when GLOBAL_STATS is removed.
223      # It's temporary verification code for transition described in
224      # http://crbug.com/245603.
225      committed = 0
226      if 'committed' in value[1]:
227        committed = value[1]['committed']
228      global_stats['total'] += committed
229      key = 'other'
230      name = value[1]['vma']['name']
231      if name.startswith('/'):
232        if value[1]['vma']['executable'] == 'x':
233          key = 'file-exec'
234        else:
235          key = 'file-nonexec'
236      elif name == '[stack]':
237        key = 'stack'
238      elif name == '':
239        key = 'anonymous'
240      global_stats[key] += committed
241      if value[0] == 'unhooked':
242        global_stats['nonprofiled-' + key] += committed
243      if value[0] == 'hooked':
244        global_stats['profiled-mmap'] += committed
245
246      if value[0] == 'unhooked':
247        if pfn_dict and dump.pageframe_length:
248          for pageframe in value[1]['pageframe']:
249            component_match = policy.find_unhooked(value, pageframe, pfn_dict)
250            sizes[component_match] += pageframe.size
251        else:
252          component_match = policy.find_unhooked(value)
253          sizes[component_match] += int(value[1]['committed'])
254      elif value[0] == 'hooked':
255        if pfn_dict and dump.pageframe_length:
256          for pageframe in value[1]['pageframe']:
257            component_match, _ = policy.find_mmap(
258                value, bucket_set, pageframe, pfn_dict)
259            sizes[component_match] += pageframe.size
260            assert not component_match.startswith('tc-')
261            if component_match.startswith('mmap-'):
262              sizes['mmap-total-log'] += pageframe.size
263            else:
264              sizes['other-total-log'] += pageframe.size
265        else:
266          component_match, _ = policy.find_mmap(value, bucket_set)
267          sizes[component_match] += int(value[1]['committed'])
268          if component_match.startswith('mmap-'):
269            sizes['mmap-total-log'] += int(value[1]['committed'])
270          else:
271            sizes['other-total-log'] += int(value[1]['committed'])
272      else:
273        LOGGER.error('Unrecognized mapping status: %s' % value[0])
274
275    return global_stats
276
277
278class CSVCommand(PolicyCommands):
279  def __init__(self):
280    super(CSVCommand, self).__init__('csv')
281
282  def do(self, sys_argv):
283    policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)
284    return self._output(
285        policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)
286
287  def _output(self, policy_set, dumps, pfn_counts_dict, bucket_set, out):
288    max_components = 0
289    for label in policy_set:
290      max_components = max(max_components, len(policy_set[label].components))
291
292    for label in sorted(policy_set):
293      components = policy_set[label].components
294      if len(policy_set) > 1:
295        out.write('%s%s\n' % (label, ',' * (max_components - 1)))
296      out.write('%s%s\n' % (
297          ','.join(components), ',' * (max_components - len(components))))
298
299      LOGGER.info('Applying a policy %s to...' % label)
300      for index, dump in enumerate(dumps):
301        if index == 0:
302          first_dump_time = dump.time
303        component_sizes = self._apply_policy(
304            dump, pfn_counts_dict, policy_set[label], bucket_set,
305            first_dump_time)
306        s = []
307        for c in components:
308          if c in ('hour', 'minute', 'second'):
309            if isinstance(component_sizes[c], str):
310              s.append('%s' % component_sizes[c])
311            else:
312              s.append('%05.5f' % (component_sizes[c]))
313          else:
314            s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))
315        out.write('%s%s\n' % (
316              ','.join(s), ',' * (max_components - len(components))))
317
318      bucket_set.clear_component_cache()
319
320    return 0
321
322
323class JSONCommand(PolicyCommands):
324  def __init__(self):
325    super(JSONCommand, self).__init__('json')
326
327  def do(self, sys_argv):
328    policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)
329    return self._output(
330        policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)
331
332  def _output(self, policy_set, dumps, pfn_counts_dict, bucket_set, out):
333    json_base = {
334      'version': 'JSON_DEEP_2',
335      'policies': {},
336    }
337
338    for label in sorted(policy_set):
339      json_base['policies'][label] = {
340        'legends': policy_set[label].components,
341        'snapshots': [],
342      }
343
344      LOGGER.info('Applying a policy %s to...' % label)
345      for index, dump in enumerate(dumps):
346        if index == 0:
347          first_dump_time = dump.time
348        component_sizes = self._apply_policy(
349            dump, pfn_counts_dict, policy_set[label], bucket_set,
350            first_dump_time)
351        component_sizes['dump_path'] = dump.path
352        component_sizes['dump_time'] = datetime.datetime.fromtimestamp(
353            dump.time).strftime('%Y-%m-%d %H:%M:%S')
354        json_base['policies'][label]['snapshots'].append(component_sizes)
355
356      bucket_set.clear_component_cache()
357
358    json.dump(json_base, out, indent=2, sort_keys=True)
359
360    return 0
361
362
363class ListCommand(PolicyCommands):
364  def __init__(self):
365    super(ListCommand, self).__init__('list')
366
367  def do(self, sys_argv):
368    policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)
369    return self._output(
370        policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)
371
372  def _output(self, policy_set, dumps, pfn_counts_dict, bucket_set, out):
373    for label in sorted(policy_set):
374      LOGGER.info('Applying a policy %s to...' % label)
375      for dump in dumps:
376        component_sizes = self._apply_policy(
377            dump, pfn_counts_dict, policy_set[label], bucket_set, dump.time)
378        out.write('%s for %s:\n' % (label, dump.path))
379        for c in policy_set[label].components:
380          if c in ['hour', 'minute', 'second']:
381            out.write('%40s %12.3f\n' % (c, component_sizes[c]))
382          else:
383            out.write('%40s %12d\n' % (c, component_sizes[c]))
384
385      bucket_set.clear_component_cache()
386
387    return 0
388