1# Copyright 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import json
6
7from telemetry.core.heap import live_heap_object
8from telemetry.core.heap import retaining_edge
9
10
11class ChromeJsHeapSnapshotParser(object):
12  """ Parser for the heap snapshot.
13
14  The heap snapshot JSON format is defined by HeapSnapshotJSONSerializer in V8.
15
16  The snapshot contains a list of integers describing nodes (types, names, etc.)
17  and a list of integers describing edges (types, the node the edge points to,
18  etc.) and a string table. All strings are expressed as indices to the string
19  table.
20
21  In addition, the snapshot contains meta information describing the data fields
22  for nodes and the data fields for edges.
23
24  Attributes:
25    _node_dict: {int -> LiveHeapObject}, maps integer ids to LiveHeapObject
26        objects.
27    _node_list: [int], the raw node data of the heap snapshot.
28    _edge_list: [int], the raw edge data of the heap snapshot.
29    _node_types: [str], the possible node types in the heap snapshot.
30    _edge_types: [str], the possible edge types in the heap snapshot.
31    _node_fields: [str], the fields present in the heap snapshot for each node.
32    _edge_fields: [str], the fields present in the heap snapshot for each node.
33    _node_type_ix: int, index of the node type field.
34    _node_name_ix: int, index of the node name field.
35    _node_id_ix: int, index of the node id field.
36    _node_edge_count_ix: int, index of the node edge count field.
37    _node_field_count: int, number of node fields.
38    _edge_type_ix: int, index of the edge type field.
39    _edge_name_or_ix_ix: int, index of the "edge name or index" field.
40    _edge_to_node_ix: int, index of the "to node for an edge" field.
41    _edge_field_count: int, number of edge fields.
42  """
43
44  def __init__(self, raw_data):
45    heap = json.loads(raw_data)
46    self._node_dict = {}
47
48    # Read the snapshot components (nodes, edges, strings, metadata).
49    self._node_list = heap['nodes']
50    self._edge_list = heap['edges']
51    self._strings = heap['strings']
52
53    self._node_types = heap['snapshot']['meta']['node_types'][0]
54    self._edge_types = heap['snapshot']['meta']['edge_types'][0]
55    node_fields = heap['snapshot']['meta']['node_fields']
56    edge_fields = heap['snapshot']['meta']['edge_fields']
57
58    # Find the indices of the required node and edge fields based on the
59    # metadata.
60    self._node_type_ix = node_fields.index('type')
61    self._node_name_ix = node_fields.index('name')
62    self._node_id_ix = node_fields.index('id')
63    self._node_edge_count_ix = node_fields.index('edge_count')
64    self._node_field_count = len(node_fields)
65
66    self._edge_type_ix = edge_fields.index('type')
67    self._edge_name_or_ix_ix = edge_fields.index('name_or_index')
68    self._edge_to_node_ix = edge_fields.index('to_node')
69    self._edge_field_count = len(edge_fields)
70
71    self._ParseSnapshot()
72
73  @staticmethod
74  def CanImport(raw_data):
75    heap = json.loads(raw_data)
76    if ('nodes' not in heap or 'edges' not in heap or 'strings' not in heap or
77        'snapshot' not in heap or 'meta' not in heap['snapshot']):
78      return False
79    meta = heap['snapshot']['meta']
80    if ('node_types' not in meta or 'edge_types' not in meta or
81        'node_fields' not in meta or 'edge_fields' not in meta):
82      return False
83    node_fields = meta['node_fields']
84    edge_fields = meta['edge_fields']
85    if ('type' not in node_fields or 'name' not in node_fields or
86        'id' not in node_fields or 'edge_count' not in node_fields):
87      return False
88    if ('type' not in edge_fields or 'name_or_index' not in edge_fields or
89        'to_node' not in edge_fields):
90      return False
91    return True
92
93  def GetAllLiveHeapObjects(self):
94    return self._node_dict.values()
95
96  @staticmethod
97  def LiveHeapObjectToJavaScript(heap_object):
98    return heap_object.name or str(heap_object)
99
100  @staticmethod
101  def RetainingEdgeToJavaScript(edge):
102    if edge.type_string == 'property':
103      return '.' + edge.name_string
104    if edge.type_string == 'element':
105      return '[' + edge.name_string + ']'
106    return str(edge)
107
108  def _ParseSnapshot(self):
109    """Parses the stored JSON snapshot data.
110
111    Fills in self._node_dict with LiveHeapObject objects constructed based on
112    the heap snapshot. The LiveHeapObject objects contain the associated
113    RetainingEdge objects.
114    """
115    edge_start_ix = 0
116    for ix in xrange(0, len(self._node_list), self._node_field_count):
117      edge_start_ix = self._ReadNodeFromIndex(ix, edge_start_ix)
118
119    # Add pointers to the endpoints to the edges, and associate the edges with
120    # the "to" nodes.
121    for node_id in self._node_dict:
122      n = self._node_dict[node_id]
123      for e in n.edges_from:
124        self._node_dict[e.to_object_id].AddEdgeTo(e)
125        e.SetFromObject(n)
126        e.SetToObject(self._node_dict[e.to_object_id])
127
128  def _ReadNodeFromIndex(self, ix, edges_start):
129    """Reads the data for a node from the heap snapshot.
130
131    If the index contains an interesting node, constructs a Node object and adds
132    it to self._node_dict.
133
134    Args:
135      ix: int, index into the self._node_list array.
136      edges_start: int, the index of the edge array where the edges for the node
137          start.
138    Returns:
139      int, the edge start index for the next node.
140
141    Raises:
142      Exception: The node list of the snapshot is malformed.
143    """
144    if ix + self._node_field_count > len(self._node_list):
145      raise Exception('Snapshot node list too short')
146
147    type_ix = self._node_list[ix + self._node_type_ix]
148    type_string = self._node_types[int(type_ix)]
149
150    # edges_end is noninclusive (the index of the first edge that is not part of
151    # this node).
152    edge_count = self._node_list[ix + self._node_edge_count_ix]
153    edges_end = edges_start + edge_count * self._edge_field_count
154
155    if ChromeJsHeapSnapshotParser._IsNodeTypeUninteresting(type_string):
156      return edges_end
157
158    name_ix = self._node_list[ix + self._node_name_ix]
159    node_id = self._node_list[ix + self._node_id_ix]
160
161    def ConstructorName(type_string, node_name_ix):
162      if type_string == 'object':
163        return self._strings[int(node_name_ix)]
164      return '(%s)' % type_string
165
166    ctor_name = ConstructorName(type_string, name_ix)
167    n = live_heap_object.LiveHeapObject(node_id, type_string, ctor_name)
168    if type_string == 'string':
169      n.string = self._strings[int(name_ix)]
170
171    for edge_ix in xrange(edges_start, edges_end, self._edge_field_count):
172      edge = self._ReadEdgeFromIndex(node_id, edge_ix)
173      if edge:
174        # The edge will be associated with the other endpoint when all the data
175        # has been read.
176        n.AddEdgeFrom(edge)
177
178    self._node_dict[node_id] = n
179    return edges_end
180
181  @staticmethod
182  def _IsNodeTypeUninteresting(type_string):
183    """Helper function for filtering out nodes from the heap snapshot.
184
185    Args:
186      type_string: str, type of the node.
187    Returns:
188      bool, True if the node is of an uninteresting type and shouldn't be
189          included in the heap snapshot analysis.
190    """
191    uninteresting_types = ('hidden', 'code', 'number', 'native', 'synthetic')
192    return type_string in uninteresting_types
193
194  @staticmethod
195  def _IsEdgeTypeUninteresting(edge_type_string):
196    """Helper function for filtering out edges from the heap snapshot.
197
198    Args:
199      edge_type_string: str, type of the edge.
200    Returns:
201      bool, True if the edge is of an uninteresting type and shouldn't be
202          included in the heap snapshot analysis.
203    """
204    uninteresting_types = ('weak', 'hidden', 'internal')
205    return edge_type_string in uninteresting_types
206
207  def _ReadEdgeFromIndex(self, node_id, edge_ix):
208    """Reads the data for an edge from the heap snapshot.
209
210    Args:
211      node_id: int, id of the node which is the starting point of the edge.
212      edge_ix: int, index into the self._edge_list array.
213    Returns:
214      Edge, if the index contains an interesting edge, otherwise None.
215    Raises:
216      Exception: The node list of the snapshot is malformed.
217    """
218    if edge_ix + self._edge_field_count > len(self._edge_list):
219      raise Exception('Snapshot edge list too short')
220
221    edge_type_ix = self._edge_list[edge_ix + self._edge_type_ix]
222    edge_type_string = self._edge_types[int(edge_type_ix)]
223
224    if ChromeJsHeapSnapshotParser._IsEdgeTypeUninteresting(edge_type_string):
225      return None
226
227    child_name_or_ix = self._edge_list[edge_ix + self._edge_name_or_ix_ix]
228    child_node_ix = self._edge_list[edge_ix + self._edge_to_node_ix]
229
230    # The child_node_ix is an index into the node list. Read the actual
231    # node information.
232    child_node_type_ix = self._node_list[child_node_ix + self._node_type_ix]
233    child_node_type_string = self._node_types[int(child_node_type_ix)]
234    child_node_id = self._node_list[child_node_ix + self._node_id_ix]
235
236    if ChromeJsHeapSnapshotParser._IsNodeTypeUninteresting(
237        child_node_type_string):
238      return None
239
240    child_name_string = ''
241    # For element nodes, the child has no name (only an index).
242    if (edge_type_string == 'element' or
243        int(child_name_or_ix) >= len(self._strings)):
244      child_name_string = str(child_name_or_ix)
245    else:
246      child_name_string = self._strings[int(child_name_or_ix)]
247    return retaining_edge.RetainingEdge(node_id, child_node_id,
248                                        edge_type_string, child_name_string)
249