1# Copyright 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import json 6 7from telemetry.core.heap import live_heap_object 8from telemetry.core.heap import retaining_edge 9 10 11class ChromeJsHeapSnapshotParser(object): 12 """ Parser for the heap snapshot. 13 14 The heap snapshot JSON format is defined by HeapSnapshotJSONSerializer in V8. 15 16 The snapshot contains a list of integers describing nodes (types, names, etc.) 17 and a list of integers describing edges (types, the node the edge points to, 18 etc.) and a string table. All strings are expressed as indices to the string 19 table. 20 21 In addition, the snapshot contains meta information describing the data fields 22 for nodes and the data fields for edges. 23 24 Attributes: 25 _node_dict: {int -> LiveHeapObject}, maps integer ids to LiveHeapObject 26 objects. 27 _node_list: [int], the raw node data of the heap snapshot. 28 _edge_list: [int], the raw edge data of the heap snapshot. 29 _node_types: [str], the possible node types in the heap snapshot. 30 _edge_types: [str], the possible edge types in the heap snapshot. 31 _node_fields: [str], the fields present in the heap snapshot for each node. 32 _edge_fields: [str], the fields present in the heap snapshot for each node. 33 _node_type_ix: int, index of the node type field. 34 _node_name_ix: int, index of the node name field. 35 _node_id_ix: int, index of the node id field. 36 _node_edge_count_ix: int, index of the node edge count field. 37 _node_field_count: int, number of node fields. 38 _edge_type_ix: int, index of the edge type field. 39 _edge_name_or_ix_ix: int, index of the "edge name or index" field. 40 _edge_to_node_ix: int, index of the "to node for an edge" field. 41 _edge_field_count: int, number of edge fields. 42 """ 43 44 def __init__(self, raw_data): 45 heap = json.loads(raw_data) 46 self._node_dict = {} 47 48 # Read the snapshot components (nodes, edges, strings, metadata). 49 self._node_list = heap['nodes'] 50 self._edge_list = heap['edges'] 51 self._strings = heap['strings'] 52 53 self._node_types = heap['snapshot']['meta']['node_types'][0] 54 self._edge_types = heap['snapshot']['meta']['edge_types'][0] 55 node_fields = heap['snapshot']['meta']['node_fields'] 56 edge_fields = heap['snapshot']['meta']['edge_fields'] 57 58 # Find the indices of the required node and edge fields based on the 59 # metadata. 60 self._node_type_ix = node_fields.index('type') 61 self._node_name_ix = node_fields.index('name') 62 self._node_id_ix = node_fields.index('id') 63 self._node_edge_count_ix = node_fields.index('edge_count') 64 self._node_field_count = len(node_fields) 65 66 self._edge_type_ix = edge_fields.index('type') 67 self._edge_name_or_ix_ix = edge_fields.index('name_or_index') 68 self._edge_to_node_ix = edge_fields.index('to_node') 69 self._edge_field_count = len(edge_fields) 70 71 self._ParseSnapshot() 72 73 @staticmethod 74 def CanImport(raw_data): 75 heap = json.loads(raw_data) 76 if ('nodes' not in heap or 'edges' not in heap or 'strings' not in heap or 77 'snapshot' not in heap or 'meta' not in heap['snapshot']): 78 return False 79 meta = heap['snapshot']['meta'] 80 if ('node_types' not in meta or 'edge_types' not in meta or 81 'node_fields' not in meta or 'edge_fields' not in meta): 82 return False 83 node_fields = meta['node_fields'] 84 edge_fields = meta['edge_fields'] 85 if ('type' not in node_fields or 'name' not in node_fields or 86 'id' not in node_fields or 'edge_count' not in node_fields): 87 return False 88 if ('type' not in edge_fields or 'name_or_index' not in edge_fields or 89 'to_node' not in edge_fields): 90 return False 91 return True 92 93 def GetAllLiveHeapObjects(self): 94 return self._node_dict.values() 95 96 @staticmethod 97 def LiveHeapObjectToJavaScript(heap_object): 98 return heap_object.name or str(heap_object) 99 100 @staticmethod 101 def RetainingEdgeToJavaScript(edge): 102 if edge.type_string == 'property': 103 return '.' + edge.name_string 104 if edge.type_string == 'element': 105 return '[' + edge.name_string + ']' 106 return str(edge) 107 108 def _ParseSnapshot(self): 109 """Parses the stored JSON snapshot data. 110 111 Fills in self._node_dict with LiveHeapObject objects constructed based on 112 the heap snapshot. The LiveHeapObject objects contain the associated 113 RetainingEdge objects. 114 """ 115 edge_start_ix = 0 116 for ix in xrange(0, len(self._node_list), self._node_field_count): 117 edge_start_ix = self._ReadNodeFromIndex(ix, edge_start_ix) 118 119 # Add pointers to the endpoints to the edges, and associate the edges with 120 # the "to" nodes. 121 for node_id in self._node_dict: 122 n = self._node_dict[node_id] 123 for e in n.edges_from: 124 self._node_dict[e.to_object_id].AddEdgeTo(e) 125 e.SetFromObject(n) 126 e.SetToObject(self._node_dict[e.to_object_id]) 127 128 def _ReadNodeFromIndex(self, ix, edges_start): 129 """Reads the data for a node from the heap snapshot. 130 131 If the index contains an interesting node, constructs a Node object and adds 132 it to self._node_dict. 133 134 Args: 135 ix: int, index into the self._node_list array. 136 edges_start: int, the index of the edge array where the edges for the node 137 start. 138 Returns: 139 int, the edge start index for the next node. 140 141 Raises: 142 Exception: The node list of the snapshot is malformed. 143 """ 144 if ix + self._node_field_count > len(self._node_list): 145 raise Exception('Snapshot node list too short') 146 147 type_ix = self._node_list[ix + self._node_type_ix] 148 type_string = self._node_types[int(type_ix)] 149 150 # edges_end is noninclusive (the index of the first edge that is not part of 151 # this node). 152 edge_count = self._node_list[ix + self._node_edge_count_ix] 153 edges_end = edges_start + edge_count * self._edge_field_count 154 155 if ChromeJsHeapSnapshotParser._IsNodeTypeUninteresting(type_string): 156 return edges_end 157 158 name_ix = self._node_list[ix + self._node_name_ix] 159 node_id = self._node_list[ix + self._node_id_ix] 160 161 def ConstructorName(type_string, node_name_ix): 162 if type_string == 'object': 163 return self._strings[int(node_name_ix)] 164 return '(%s)' % type_string 165 166 ctor_name = ConstructorName(type_string, name_ix) 167 n = live_heap_object.LiveHeapObject(node_id, type_string, ctor_name) 168 if type_string == 'string': 169 n.string = self._strings[int(name_ix)] 170 171 for edge_ix in xrange(edges_start, edges_end, self._edge_field_count): 172 edge = self._ReadEdgeFromIndex(node_id, edge_ix) 173 if edge: 174 # The edge will be associated with the other endpoint when all the data 175 # has been read. 176 n.AddEdgeFrom(edge) 177 178 self._node_dict[node_id] = n 179 return edges_end 180 181 @staticmethod 182 def _IsNodeTypeUninteresting(type_string): 183 """Helper function for filtering out nodes from the heap snapshot. 184 185 Args: 186 type_string: str, type of the node. 187 Returns: 188 bool, True if the node is of an uninteresting type and shouldn't be 189 included in the heap snapshot analysis. 190 """ 191 uninteresting_types = ('hidden', 'code', 'number', 'native', 'synthetic') 192 return type_string in uninteresting_types 193 194 @staticmethod 195 def _IsEdgeTypeUninteresting(edge_type_string): 196 """Helper function for filtering out edges from the heap snapshot. 197 198 Args: 199 edge_type_string: str, type of the edge. 200 Returns: 201 bool, True if the edge is of an uninteresting type and shouldn't be 202 included in the heap snapshot analysis. 203 """ 204 uninteresting_types = ('weak', 'hidden', 'internal') 205 return edge_type_string in uninteresting_types 206 207 def _ReadEdgeFromIndex(self, node_id, edge_ix): 208 """Reads the data for an edge from the heap snapshot. 209 210 Args: 211 node_id: int, id of the node which is the starting point of the edge. 212 edge_ix: int, index into the self._edge_list array. 213 Returns: 214 Edge, if the index contains an interesting edge, otherwise None. 215 Raises: 216 Exception: The node list of the snapshot is malformed. 217 """ 218 if edge_ix + self._edge_field_count > len(self._edge_list): 219 raise Exception('Snapshot edge list too short') 220 221 edge_type_ix = self._edge_list[edge_ix + self._edge_type_ix] 222 edge_type_string = self._edge_types[int(edge_type_ix)] 223 224 if ChromeJsHeapSnapshotParser._IsEdgeTypeUninteresting(edge_type_string): 225 return None 226 227 child_name_or_ix = self._edge_list[edge_ix + self._edge_name_or_ix_ix] 228 child_node_ix = self._edge_list[edge_ix + self._edge_to_node_ix] 229 230 # The child_node_ix is an index into the node list. Read the actual 231 # node information. 232 child_node_type_ix = self._node_list[child_node_ix + self._node_type_ix] 233 child_node_type_string = self._node_types[int(child_node_type_ix)] 234 child_node_id = self._node_list[child_node_ix + self._node_id_ix] 235 236 if ChromeJsHeapSnapshotParser._IsNodeTypeUninteresting( 237 child_node_type_string): 238 return None 239 240 child_name_string = '' 241 # For element nodes, the child has no name (only an index). 242 if (edge_type_string == 'element' or 243 int(child_name_or_ix) >= len(self._strings)): 244 child_name_string = str(child_name_or_ix) 245 else: 246 child_name_string = self._strings[int(child_name_or_ix)] 247 return retaining_edge.RetainingEdge(node_id, child_node_id, 248 edge_type_string, child_name_string) 249