1# Copyright 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import cStringIO
6import json
7import logging
8import os
9import re
10
11from lib.ordered_dict import OrderedDict
12
13
14LOGGER = logging.getLogger('dmprof')
15
16BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
17
18DEFAULT_SORTERS = [
19    os.path.join(BASE_PATH, 'sorters', 'malloc.browser-module.json'),
20    os.path.join(BASE_PATH, 'sorters', 'malloc.renderer-module.json'),
21    os.path.join(BASE_PATH, 'sorters', 'malloc.type.json'),
22    os.path.join(BASE_PATH, 'sorters', 'malloc.WebCore.json'),
23    os.path.join(BASE_PATH, 'sorters', 'vm.Android-specific.json'),
24    os.path.join(BASE_PATH, 'sorters', 'vm.base.json'),
25    os.path.join(BASE_PATH, 'sorters', 'vm.GPU.json'),
26    os.path.join(BASE_PATH, 'sorters', 'vm.sharing.json'),
27    os.path.join(BASE_PATH, 'sorters', 'vm.Skia.json'),
28    os.path.join(BASE_PATH, 'sorters', 'vm.V8.json'),
29    ]
30
31DEFAULT_TEMPLATES = os.path.join(BASE_PATH, 'templates.json')
32
33
34class Unit(object):
35  """Represents a minimum unit of memory usage categorization.
36
37  It is supposed to be inherited for some different spaces like the entire
38  virtual memory and malloc arena. Such different spaces are called "worlds"
39  in dmprof. (For example, the "vm" world and the "malloc" world.)
40  """
41  def __init__(self, unit_id, size):
42    self._unit_id = unit_id
43    self._size = size
44
45  @property
46  def unit_id(self):
47    return self._unit_id
48
49  @property
50  def size(self):
51    return self._size
52
53
54class VMUnit(Unit):
55  """Represents a Unit for a memory region on virtual memory."""
56  def __init__(self, unit_id, committed, reserved, mmap, region,
57               pageframe=None, group_pfn_counts=None):
58    super(VMUnit, self).__init__(unit_id, committed)
59    self._reserved = reserved
60    self._mmap = mmap
61    self._region = region
62    self._pageframe = pageframe
63    self._group_pfn_counts = group_pfn_counts
64
65  @property
66  def committed(self):
67    return self._size
68
69  @property
70  def reserved(self):
71    return self._reserved
72
73  @property
74  def mmap(self):
75    return self._mmap
76
77  @property
78  def region(self):
79    return self._region
80
81  @property
82  def pageframe(self):
83    return self._pageframe
84
85  @property
86  def group_pfn_counts(self):
87    return self._group_pfn_counts
88
89
90class MMapUnit(VMUnit):
91  """Represents a Unit for a mmap'ed region."""
92  def __init__(self, unit_id, committed, reserved, region, bucket_set,
93               pageframe=None, group_pfn_counts=None):
94    super(MMapUnit, self).__init__(unit_id, committed, reserved, True,
95                                   region, pageframe, group_pfn_counts)
96    self._bucket_set = bucket_set
97
98  def __repr__(self):
99    return str(self.region)
100
101  @property
102  def bucket_set(self):
103    return self._bucket_set
104
105
106class UnhookedUnit(VMUnit):
107  """Represents a Unit for a non-mmap'ed memory region on virtual memory."""
108  def __init__(self, unit_id, committed, reserved, region,
109               pageframe=None, group_pfn_counts=None):
110    super(UnhookedUnit, self).__init__(unit_id, committed, reserved, False,
111                                       region, pageframe, group_pfn_counts)
112
113  def __repr__(self):
114    return str(self.region)
115
116
117class MallocUnit(Unit):
118  """Represents a Unit for a malloc'ed memory block."""
119  def __init__(self, unit_id, size, alloc_count, free_count, bucket):
120    super(MallocUnit, self).__init__(unit_id, size)
121    self._bucket = bucket
122    self._alloc_count = alloc_count
123    self._free_count = free_count
124
125  def __repr__(self):
126    return str(self.bucket)
127
128  @property
129  def bucket(self):
130    return self._bucket
131
132  @property
133  def alloc_count(self):
134    return self._alloc_count
135
136  @property
137  def free_count(self):
138    return self._free_count
139
140
141class UnitSet(object):
142  """Represents an iterable set of Units."""
143  def __init__(self, world):
144    self._units = {}
145    self._world = world
146
147  def __repr__(self):
148    return str(self._units)
149
150  def __iter__(self):
151    for unit_id in sorted(self._units):
152      yield self._units[unit_id]
153
154  def append(self, unit, overwrite=False):
155    if not overwrite and unit.unit_id in self._units:
156      LOGGER.error('The unit id=%s already exists.' % str(unit.unit_id))
157    self._units[unit.unit_id] = unit
158
159
160class AbstractRule(object):
161  """An abstract class for rules to be matched with units."""
162  def __init__(self, dct):
163    self._name = dct['name']
164    self._hidden = dct.get('hidden', False)
165    self._subs = dct.get('subs', [])
166
167  def match(self, unit):
168    raise NotImplementedError()
169
170  @property
171  def name(self):
172    return self._name
173
174  @property
175  def hidden(self):
176    return self._hidden
177
178  def iter_subs(self):
179    for sub in self._subs:
180      yield sub
181
182
183class VMRule(AbstractRule):
184  """Represents a Rule to match with virtual memory regions."""
185  def __init__(self, dct):
186    super(VMRule, self).__init__(dct)
187    self._backtrace_function = dct.get('backtrace_function', None)
188    if self._backtrace_function:
189      self._backtrace_function = re.compile(self._backtrace_function)
190    self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None)
191    if self._backtrace_sourcefile:
192      self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile)
193    self._mmap = dct.get('mmap', None)
194    self._sharedwith = dct.get('sharedwith', [])
195    self._mapped_pathname = dct.get('mapped_pathname', None)
196    if self._mapped_pathname:
197      self._mapped_pathname = re.compile(self._mapped_pathname)
198    self._mapped_permission = dct.get('mapped_permission', None)
199    if self._mapped_permission:
200      self._mapped_permission = re.compile(self._mapped_permission)
201
202  def __repr__(self):
203    result = cStringIO.StringIO()
204    result.write('%s: ' % self._name)
205    attributes = []
206    attributes.append('mmap: %s' % self._mmap)
207    if self._backtrace_function:
208      attributes.append('backtrace_function: "%s"' %
209                        self._backtrace_function.pattern)
210    if self._sharedwith:
211      attributes.append('sharedwith: "%s"' % self._sharedwith)
212    if self._mapped_pathname:
213      attributes.append('mapped_pathname: "%s"' % self._mapped_pathname.pattern)
214    if self._mapped_permission:
215      attributes.append('mapped_permission: "%s"' %
216                        self._mapped_permission.pattern)
217    result.write('{ %s }' % ', '.join(attributes))
218    return result.getvalue()
219
220  def match(self, unit):
221    if unit.mmap:
222      assert unit.region[0] == 'hooked'
223      bucket = unit.bucket_set.get(unit.region[1]['bucket_id'])
224      assert bucket
225      assert bucket.allocator_type == 'mmap'
226
227      stackfunction = bucket.symbolized_joined_stackfunction
228      stacksourcefile = bucket.symbolized_joined_stacksourcefile
229
230      # TODO(dmikurube): Support shared memory.
231      sharedwith = None
232
233      if self._mmap == False: # (self._mmap == None) should go through.
234        return False
235      if (self._backtrace_function and
236          not self._backtrace_function.match(stackfunction)):
237        return False
238      if (self._backtrace_sourcefile and
239          not self._backtrace_sourcefile.match(stacksourcefile)):
240        return False
241      if (self._mapped_pathname and
242          not self._mapped_pathname.match(unit.region[1]['vma']['name'])):
243        return False
244      if (self._mapped_permission and
245          not self._mapped_permission.match(
246              unit.region[1]['vma']['readable'] +
247              unit.region[1]['vma']['writable'] +
248              unit.region[1]['vma']['executable'] +
249              unit.region[1]['vma']['private'])):
250        return False
251      if (self._sharedwith and
252          unit.pageframe and sharedwith not in self._sharedwith):
253        return False
254
255      return True
256
257    else:
258      assert unit.region[0] == 'unhooked'
259
260      # TODO(dmikurube): Support shared memory.
261      sharedwith = None
262
263      if self._mmap == True: # (self._mmap == None) should go through.
264        return False
265      if (self._mapped_pathname and
266          not self._mapped_pathname.match(unit.region[1]['vma']['name'])):
267        return False
268      if (self._mapped_permission and
269          not self._mapped_permission.match(
270              unit.region[1]['vma']['readable'] +
271              unit.region[1]['vma']['writable'] +
272              unit.region[1]['vma']['executable'] +
273              unit.region[1]['vma']['private'])):
274        return False
275      if (self._sharedwith and
276          unit.pageframe and sharedwith not in self._sharedwith):
277        return False
278
279      return True
280
281
282class MallocRule(AbstractRule):
283  """Represents a Rule to match with malloc'ed blocks."""
284  def __init__(self, dct):
285    super(MallocRule, self).__init__(dct)
286    self._backtrace_function = dct.get('backtrace_function', None)
287    if self._backtrace_function:
288      self._backtrace_function = re.compile(self._backtrace_function)
289    self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None)
290    if self._backtrace_sourcefile:
291      self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile)
292    self._typeinfo = dct.get('typeinfo', None)
293    if self._typeinfo:
294      self._typeinfo = re.compile(self._typeinfo)
295
296  def __repr__(self):
297    result = cStringIO.StringIO()
298    result.write('%s: ' % self._name)
299    attributes = []
300    if self._backtrace_function:
301      attributes.append('backtrace_function: "%s"' %
302                        self._backtrace_function.pattern)
303    if self._typeinfo:
304      attributes.append('typeinfo: "%s"' % self._typeinfo.pattern)
305    result.write('{ %s }' % ', '.join(attributes))
306    return result.getvalue()
307
308  def match(self, unit):
309    assert unit.bucket.allocator_type == 'malloc'
310
311    stackfunction = unit.bucket.symbolized_joined_stackfunction
312    stacksourcefile = unit.bucket.symbolized_joined_stacksourcefile
313    typeinfo = unit.bucket.symbolized_typeinfo
314    if typeinfo.startswith('0x'):
315      typeinfo = unit.bucket.typeinfo_name
316
317    return ((not self._backtrace_function or
318             self._backtrace_function.match(stackfunction)) and
319            (not self._backtrace_sourcefile or
320             self._backtrace_sourcefile.match(stacksourcefile)) and
321            (not self._typeinfo or self._typeinfo.match(typeinfo)))
322
323
324class AbstractSorter(object):
325  """An abstract class for classifying Units with a set of Rules."""
326  def __init__(self, dct):
327    self._type = 'sorter'
328    self._version = dct['version']
329    self._world = dct['world']
330    self._name = dct['name']
331    self._root = dct.get('root', False)
332    self._order = dct['order']
333
334    self._rules = []
335    for rule in dct['rules']:
336      if dct['world'] == 'vm':
337        self._rules.append(VMRule(rule))
338      elif dct['world'] == 'malloc':
339        self._rules.append(MallocRule(rule))
340      else:
341        LOGGER.error('Unknown sorter world type')
342
343  def __repr__(self):
344    result = cStringIO.StringIO()
345    print >> result, '%s' % self._name
346    print >> result, 'world=%s' % self._world
347    print >> result, 'name=%s' % self._name
348    print >> result, 'order=%s' % self._order
349    print >> result, 'rules:'
350    for rule in self._rules:
351      print >> result, '  %s' % rule
352    return result.getvalue()
353
354  @staticmethod
355  def load(filename):
356    with open(filename) as sorter_f:
357      sorter_dict = json.load(sorter_f, object_pairs_hook=OrderedDict)
358    if sorter_dict['world'] == 'vm':
359      return VMSorter(sorter_dict)
360    elif sorter_dict['world'] == 'malloc':
361      return MallocSorter(sorter_dict)
362    else:
363      LOGGER.error('Unknown sorter world type')
364      return None
365
366  @property
367  def world(self):
368    return self._world
369
370  @property
371  def name(self):
372    return self._name
373
374  @property
375  def root(self):
376    return self._root
377
378  def iter_rule(self):
379    for rule in self._rules:
380      yield rule
381
382  def find(self, unit):
383    raise NotImplementedError()
384
385  def find_rule(self, name):
386    """Finds a rule whose name is |name|. """
387    for rule in self._rules:
388      if rule.name == name:
389        return rule
390    return None
391
392
393class VMSorter(AbstractSorter):
394  """Represents a Sorter for memory regions on virtual memory."""
395  def __init__(self, dct):
396    assert dct['world'] == 'vm'
397    super(VMSorter, self).__init__(dct)
398
399  def find(self, unit):
400    for rule in self._rules:
401      if rule.match(unit):
402        return rule
403    return None
404
405
406class MallocSorter(AbstractSorter):
407  """Represents a Sorter for malloc'ed blocks."""
408  def __init__(self, dct):
409    assert dct['world'] == 'malloc'
410    super(MallocSorter, self).__init__(dct)
411
412  def find(self, unit):
413    if not unit.bucket:
414      return None
415    assert unit.bucket.allocator_type == 'malloc'
416
417    # TODO(dmikurube): Utilize component_cache again, or remove it.
418
419    for rule in self._rules:
420      if rule.match(unit):
421        return rule
422    return None
423
424
425class SorterTemplates(object):
426  """Represents a template for sorters."""
427  def __init__(self, dct):
428    self._dict = dct
429
430  def as_dict(self):
431    return self._dict
432
433  @staticmethod
434  def load(filename):
435    with open(filename) as templates_f:
436      templates_dict = json.load(templates_f, object_pairs_hook=OrderedDict)
437    return SorterTemplates(templates_dict)
438
439
440class SorterSet(object):
441  """Represents an iterable set of Sorters."""
442  def __init__(self, additional=None, default=None):
443    if not additional:
444      additional = []
445    if not default:
446      default = DEFAULT_SORTERS
447    self._sorters = {}
448    LOGGER.info('Loading sorters.')
449    for filename in default + additional:
450      LOGGER.info('  Loading a sorter "%s".' % filename)
451      sorter = AbstractSorter.load(filename)
452      if sorter.world not in self._sorters:
453        self._sorters[sorter.world] = []
454      self._sorters[sorter.world].append(sorter)
455    self._templates = SorterTemplates.load(DEFAULT_TEMPLATES)
456
457  def __repr__(self):
458    result = cStringIO.StringIO()
459    for world, sorters in self._sorters.iteritems():
460      for sorter in sorters:
461        print >> result, '%s: %s' % (world, sorter)
462    return result.getvalue()
463
464  def __iter__(self):
465    for sorters in self._sorters.itervalues():
466      for sorter in sorters:
467        yield sorter
468
469  def iter_world(self, world):
470    for sorter in self._sorters.get(world, []):
471      yield sorter
472
473  @property
474  def templates(self):
475    return self._templates
476