1// Copyright 2015 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/trace_event/heap_profiler_heap_dump_writer.h" 6 7#include <stdint.h> 8 9#include <algorithm> 10#include <iterator> 11#include <tuple> 12#include <utility> 13#include <vector> 14 15#include "base/format_macros.h" 16#include "base/logging.h" 17#include "base/macros.h" 18#include "base/strings/stringprintf.h" 19#include "base/trace_event/heap_profiler_stack_frame_deduplicator.h" 20#include "base/trace_event/heap_profiler_type_name_deduplicator.h" 21#include "base/trace_event/memory_dump_session_state.h" 22#include "base/trace_event/trace_config.h" 23#include "base/trace_event/trace_event_argument.h" 24#include "base/trace_event/trace_log.h" 25 26// Most of what the |HeapDumpWriter| does is aggregating detailed information 27// about the heap and deciding what to dump. The Input to this process is a list 28// of |AllocationContext|s and size pairs. 29// 30// The pairs are grouped into |Bucket|s. A bucket is a group of (context, size) 31// pairs where the properties of the contexts share a prefix. (Type name is 32// considered a list of length one here.) First all pairs are put into one 33// bucket that represents the entire heap. Then this bucket is recursively 34// broken down into smaller buckets. Each bucket keeps track of whether further 35// breakdown is possible. 36 37namespace base { 38namespace trace_event { 39namespace internal { 40namespace { 41 42// Denotes a property of |AllocationContext| to break down by. 43enum class BreakDownMode { kByBacktrace, kByTypeName }; 44 45// A group of bytes for which the context shares a prefix. 46struct Bucket { 47 Bucket() 48 : size(0), 49 count(0), 50 backtrace_cursor(0), 51 is_broken_down_by_type_name(false) {} 52 53 std::vector<std::pair<const AllocationContext*, AllocationMetrics>> 54 metrics_by_context; 55 56 // The sum of the sizes of |metrics_by_context|. 57 size_t size; 58 59 // The sum of number of allocations of |metrics_by_context|. 60 size_t count; 61 62 // The index of the stack frame that has not yet been broken down by. For all 63 // elements in this bucket, the stack frames 0 up to (but not including) the 64 // cursor, must be equal. 65 size_t backtrace_cursor; 66 67 // When true, the type name for all elements in this bucket must be equal. 68 bool is_broken_down_by_type_name; 69}; 70 71// Comparison operator to order buckets by their size. 72bool operator<(const Bucket& lhs, const Bucket& rhs) { 73 return lhs.size < rhs.size; 74} 75 76// Groups the allocations in the bucket by |break_by|. The buckets in the 77// returned list will have |backtrace_cursor| advanced or 78// |is_broken_down_by_type_name| set depending on the property to group by. 79std::vector<Bucket> GetSubbuckets(const Bucket& bucket, 80 BreakDownMode break_by) { 81 base::hash_map<const void*, Bucket> breakdown; 82 83 84 if (break_by == BreakDownMode::kByBacktrace) { 85 for (const auto& context_and_metrics : bucket.metrics_by_context) { 86 const Backtrace& backtrace = context_and_metrics.first->backtrace; 87 const StackFrame* begin = std::begin(backtrace.frames); 88 const StackFrame* end = begin + backtrace.frame_count; 89 const StackFrame* cursor = begin + bucket.backtrace_cursor; 90 91 DCHECK_LE(cursor, end); 92 93 if (cursor != end) { 94 Bucket& subbucket = breakdown[cursor->value]; 95 subbucket.size += context_and_metrics.second.size; 96 subbucket.count += context_and_metrics.second.count; 97 subbucket.metrics_by_context.push_back(context_and_metrics); 98 subbucket.backtrace_cursor = bucket.backtrace_cursor + 1; 99 subbucket.is_broken_down_by_type_name = 100 bucket.is_broken_down_by_type_name; 101 DCHECK_GT(subbucket.size, 0u); 102 DCHECK_GT(subbucket.count, 0u); 103 } 104 } 105 } else if (break_by == BreakDownMode::kByTypeName) { 106 if (!bucket.is_broken_down_by_type_name) { 107 for (const auto& context_and_metrics : bucket.metrics_by_context) { 108 const AllocationContext* context = context_and_metrics.first; 109 Bucket& subbucket = breakdown[context->type_name]; 110 subbucket.size += context_and_metrics.second.size; 111 subbucket.count += context_and_metrics.second.count; 112 subbucket.metrics_by_context.push_back(context_and_metrics); 113 subbucket.backtrace_cursor = bucket.backtrace_cursor; 114 subbucket.is_broken_down_by_type_name = true; 115 DCHECK_GT(subbucket.size, 0u); 116 DCHECK_GT(subbucket.count, 0u); 117 } 118 } 119 } 120 121 std::vector<Bucket> buckets; 122 buckets.reserve(breakdown.size()); 123 for (auto key_bucket : breakdown) 124 buckets.push_back(key_bucket.second); 125 126 return buckets; 127} 128 129// Breaks down the bucket by |break_by|. Returns only buckets that contribute 130// more than |min_size_bytes| to the total size. The long tail is omitted. 131std::vector<Bucket> BreakDownBy(const Bucket& bucket, 132 BreakDownMode break_by, 133 size_t min_size_bytes) { 134 std::vector<Bucket> buckets = GetSubbuckets(bucket, break_by); 135 136 // Ensure that |buckets| is a max-heap (the data structure, not memory heap), 137 // so its front contains the largest bucket. Buckets should be iterated 138 // ordered by size, but sorting the vector is overkill because the long tail 139 // of small buckets will be discarded. By using a max-heap, the optimal case 140 // where all but the first bucket are discarded is O(n). The worst case where 141 // no bucket is discarded is doing a heap sort, which is O(n log n). 142 std::make_heap(buckets.begin(), buckets.end()); 143 144 // Keep including buckets until adding one would increase the number of 145 // bytes accounted for by |min_size_bytes|. The large buckets end up in 146 // [it, end()), [begin(), it) is the part that contains the max-heap 147 // of small buckets. 148 std::vector<Bucket>::iterator it; 149 for (it = buckets.end(); it != buckets.begin(); --it) { 150 if (buckets.front().size < min_size_bytes) 151 break; 152 153 // Put the largest bucket in [begin, it) at |it - 1| and max-heapify 154 // [begin, it - 1). This puts the next largest bucket at |buckets.front()|. 155 std::pop_heap(buckets.begin(), it); 156 } 157 158 // At this point, |buckets| looks like this (numbers are bucket sizes): 159 // 160 // <-- max-heap of small buckets ---> 161 // <-- large buckets by ascending size --> 162 // [ 19 | 11 | 13 | 7 | 2 | 5 | ... | 83 | 89 | 97 ] 163 // ^ ^ ^ 164 // | | | 165 // begin() it end() 166 167 // Discard the long tail of buckets that contribute less than a percent. 168 buckets.erase(buckets.begin(), it); 169 170 return buckets; 171} 172 173} // namespace 174 175bool operator<(Entry lhs, Entry rhs) { 176 // There is no need to compare |size|. If the backtrace and type name are 177 // equal then the sizes must be equal as well. 178 return std::tie(lhs.stack_frame_id, lhs.type_id) < 179 std::tie(rhs.stack_frame_id, rhs.type_id); 180} 181 182HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator, 183 TypeNameDeduplicator* type_name_deduplicator, 184 uint32_t breakdown_threshold_bytes) 185 : stack_frame_deduplicator_(stack_frame_deduplicator), 186 type_name_deduplicator_(type_name_deduplicator), 187 breakdown_threshold_bytes_(breakdown_threshold_bytes) { 188} 189 190HeapDumpWriter::~HeapDumpWriter() {} 191 192bool HeapDumpWriter::AddEntryForBucket(const Bucket& bucket) { 193 // The contexts in the bucket are all different, but the [begin, cursor) range 194 // is equal for all contexts in the bucket, and the type names are the same if 195 // |is_broken_down_by_type_name| is set. 196 DCHECK(!bucket.metrics_by_context.empty()); 197 198 const AllocationContext* context = bucket.metrics_by_context.front().first; 199 200 const StackFrame* backtrace_begin = std::begin(context->backtrace.frames); 201 const StackFrame* backtrace_end = backtrace_begin + bucket.backtrace_cursor; 202 DCHECK_LE(bucket.backtrace_cursor, arraysize(context->backtrace.frames)); 203 204 Entry entry; 205 entry.stack_frame_id = stack_frame_deduplicator_->Insert( 206 backtrace_begin, backtrace_end); 207 208 // Deduplicate the type name, or use ID -1 if type name is not set. 209 entry.type_id = bucket.is_broken_down_by_type_name 210 ? type_name_deduplicator_->Insert(context->type_name) 211 : -1; 212 213 entry.size = bucket.size; 214 entry.count = bucket.count; 215 216 auto position_and_inserted = entries_.insert(entry); 217 return position_and_inserted.second; 218} 219 220void HeapDumpWriter::BreakDown(const Bucket& bucket) { 221 auto by_backtrace = BreakDownBy(bucket, 222 BreakDownMode::kByBacktrace, 223 breakdown_threshold_bytes_); 224 auto by_type_name = BreakDownBy(bucket, 225 BreakDownMode::kByTypeName, 226 breakdown_threshold_bytes_); 227 228 // Insert entries for the buckets. If a bucket was not present before, it has 229 // not been broken down before, so recursively continue breaking down in that 230 // case. There might be multiple routes to the same entry (first break down 231 // by type name, then by backtrace, or first by backtrace and then by type), 232 // so a set is used to avoid dumping and breaking down entries more than once. 233 234 for (const Bucket& subbucket : by_backtrace) 235 if (AddEntryForBucket(subbucket)) 236 BreakDown(subbucket); 237 238 for (const Bucket& subbucket : by_type_name) 239 if (AddEntryForBucket(subbucket)) 240 BreakDown(subbucket); 241} 242 243const std::set<Entry>& HeapDumpWriter::Summarize( 244 const hash_map<AllocationContext, AllocationMetrics>& metrics_by_context) { 245 // Start with one bucket that represents the entire heap. Iterate by 246 // reference, because the allocation contexts are going to point to allocation 247 // contexts stored in |metrics_by_context|. 248 Bucket root_bucket; 249 for (const auto& context_and_metrics : metrics_by_context) { 250 DCHECK_GT(context_and_metrics.second.size, 0u); 251 DCHECK_GT(context_and_metrics.second.count, 0u); 252 const AllocationContext* context = &context_and_metrics.first; 253 root_bucket.metrics_by_context.push_back( 254 std::make_pair(context, context_and_metrics.second)); 255 root_bucket.size += context_and_metrics.second.size; 256 root_bucket.count += context_and_metrics.second.count; 257 } 258 259 AddEntryForBucket(root_bucket); 260 261 // Recursively break down the heap and fill |entries_| with entries to dump. 262 BreakDown(root_bucket); 263 264 return entries_; 265} 266 267std::unique_ptr<TracedValue> Serialize(const std::set<Entry>& entries) { 268 std::string buffer; 269 std::unique_ptr<TracedValue> traced_value(new TracedValue); 270 271 traced_value->BeginArray("entries"); 272 273 for (const Entry& entry : entries) { 274 traced_value->BeginDictionary(); 275 276 // Format size as hexadecimal string into |buffer|. 277 SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size)); 278 traced_value->SetString("size", buffer); 279 280 SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.count)); 281 traced_value->SetString("count", buffer); 282 283 if (entry.stack_frame_id == -1) { 284 // An empty backtrace (which will have ID -1) is represented by the empty 285 // string, because there is no leaf frame to reference in |stackFrames|. 286 traced_value->SetString("bt", ""); 287 } else { 288 // Format index of the leaf frame as a string, because |stackFrames| is a 289 // dictionary, not an array. 290 SStringPrintf(&buffer, "%i", entry.stack_frame_id); 291 traced_value->SetString("bt", buffer); 292 } 293 294 // Type ID -1 (cumulative size for all types) is represented by the absence 295 // of the "type" key in the dictionary. 296 if (entry.type_id != -1) { 297 // Format the type ID as a string. 298 SStringPrintf(&buffer, "%i", entry.type_id); 299 traced_value->SetString("type", buffer); 300 } 301 302 traced_value->EndDictionary(); 303 } 304 305 traced_value->EndArray(); // "entries" 306 return traced_value; 307} 308 309} // namespace internal 310 311std::unique_ptr<TracedValue> ExportHeapDump( 312 const hash_map<AllocationContext, AllocationMetrics>& metrics_by_context, 313 const MemoryDumpSessionState& session_state) { 314 internal::HeapDumpWriter writer( 315 session_state.stack_frame_deduplicator(), 316 session_state.type_name_deduplicator(), 317 session_state.memory_dump_config().heap_profiler_options 318 .breakdown_threshold_bytes); 319 return Serialize(writer.Summarize(metrics_by_context)); 320} 321 322} // namespace trace_event 323} // namespace base 324