1/* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "command.h" 18 19#include <unordered_map> 20 21#include <android-base/logging.h> 22#include <android-base/strings.h> 23 24#include "callchain.h" 25#include "event_attr.h" 26#include "event_type.h" 27#include "record_file.h" 28#include "sample_tree.h" 29#include "tracing.h" 30#include "utils.h" 31 32namespace { 33 34struct SlabSample { 35 const Symbol* symbol; // the function making allocation 36 uint64_t ptr; // the start address of the allocated space 37 uint64_t bytes_req; // requested space size 38 uint64_t bytes_alloc; // allocated space size 39 uint64_t sample_count; // count of allocations 40 uint64_t gfp_flags; // flags used for allocation 41 uint64_t cross_cpu_allocations; // count of allocations freed not on the 42 // cpu allocating them 43 CallChainRoot<SlabSample> callchain; // a callchain tree representing all 44 // callchains in this sample 45 SlabSample(const Symbol* symbol, uint64_t ptr, uint64_t bytes_req, 46 uint64_t bytes_alloc, uint64_t sample_count, uint64_t gfp_flags, 47 uint64_t cross_cpu_allocations) 48 : symbol(symbol), 49 ptr(ptr), 50 bytes_req(bytes_req), 51 bytes_alloc(bytes_alloc), 52 sample_count(sample_count), 53 gfp_flags(gfp_flags), 54 cross_cpu_allocations(cross_cpu_allocations) {} 55 56 uint64_t GetPeriod() const { 57 return sample_count; 58 } 59}; 60 61struct SlabAccumulateInfo { 62 uint64_t bytes_req; 63 uint64_t bytes_alloc; 64}; 65 66BUILD_COMPARE_VALUE_FUNCTION(ComparePtr, ptr); 67BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesReq, bytes_req); 68BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesAlloc, bytes_alloc); 69BUILD_COMPARE_VALUE_FUNCTION(CompareGfpFlags, gfp_flags); 70BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareCrossCpuAllocations, 71 cross_cpu_allocations); 72 73BUILD_DISPLAY_HEX64_FUNCTION(DisplayPtr, ptr); 74BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesReq, bytes_req); 75BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesAlloc, bytes_alloc); 76BUILD_DISPLAY_HEX64_FUNCTION(DisplayGfpFlags, gfp_flags); 77BUILD_DISPLAY_UINT64_FUNCTION(DisplayCrossCpuAllocations, 78 cross_cpu_allocations); 79 80static int CompareFragment(const SlabSample* sample1, 81 const SlabSample* sample2) { 82 uint64_t frag1 = sample1->bytes_alloc - sample1->bytes_req; 83 uint64_t frag2 = sample2->bytes_alloc - sample2->bytes_req; 84 return Compare(frag2, frag1); 85} 86 87static std::string DisplayFragment(const SlabSample* sample) { 88 return android::base::StringPrintf("%" PRIu64, 89 sample->bytes_alloc - sample->bytes_req); 90} 91 92struct SlabSampleTree { 93 std::vector<SlabSample*> samples; 94 uint64_t total_requested_bytes; 95 uint64_t total_allocated_bytes; 96 uint64_t nr_allocations; 97 uint64_t nr_frees; 98 uint64_t nr_cross_cpu_allocations; 99}; 100 101struct SlabFormat { 102 enum { 103 KMEM_ALLOC, 104 KMEM_FREE, 105 } type; 106 TracingFieldPlace call_site; 107 TracingFieldPlace ptr; 108 TracingFieldPlace bytes_req; 109 TracingFieldPlace bytes_alloc; 110 TracingFieldPlace gfp_flags; 111}; 112 113class SlabSampleTreeBuilder 114 : public SampleTreeBuilder<SlabSample, SlabAccumulateInfo> { 115 public: 116 SlabSampleTreeBuilder(SampleComparator<SlabSample> sample_comparator, 117 ThreadTree* thread_tree) 118 : SampleTreeBuilder(sample_comparator), 119 thread_tree_(thread_tree), 120 total_requested_bytes_(0), 121 total_allocated_bytes_(0), 122 nr_allocations_(0), 123 nr_cross_cpu_allocations_(0) {} 124 125 SlabSampleTree GetSampleTree() const { 126 SlabSampleTree sample_tree; 127 sample_tree.samples = GetSamples(); 128 sample_tree.total_requested_bytes = total_requested_bytes_; 129 sample_tree.total_allocated_bytes = total_allocated_bytes_; 130 sample_tree.nr_allocations = nr_allocations_; 131 sample_tree.nr_frees = nr_frees_; 132 sample_tree.nr_cross_cpu_allocations = nr_cross_cpu_allocations_; 133 return sample_tree; 134 } 135 136 void AddSlabFormat(const std::vector<uint64_t>& event_ids, 137 SlabFormat format) { 138 std::unique_ptr<SlabFormat> p(new SlabFormat(format)); 139 for (auto id : event_ids) { 140 event_id_to_format_map_[id] = p.get(); 141 } 142 formats_.push_back(std::move(p)); 143 } 144 145 protected: 146 SlabSample* CreateSample(const SampleRecord& r, bool in_kernel, 147 SlabAccumulateInfo* acc_info) override { 148 if (!in_kernel) { 149 // Normally we don't parse records in user space because tracepoint 150 // events all happen in kernel. But if r.ip_data.ip == 0, it may be 151 // a kernel record failed to dump ip register and is still useful. 152 if (r.ip_data.ip == 0) { 153 // It seems we are on a kernel can't dump regset for tracepoint events 154 // because of lacking perf_arch_fetch_caller_regs(). We can't get 155 // callchain, but we can still do a normal report. 156 static bool first = true; 157 if (first) { 158 first = false; 159 if (accumulate_callchain_) { 160 // The kernel doesn't seem to support dumping registers for 161 // tracepoint events because of lacking 162 // perf_arch_fetch_caller_regs(). 163 LOG(WARNING) << "simpleperf may not get callchains for tracepoint" 164 << " events because of lacking kernel support."; 165 } 166 } 167 } else { 168 return nullptr; 169 } 170 } 171 uint64_t id = r.id_data.id; 172 auto it = event_id_to_format_map_.find(id); 173 if (it == event_id_to_format_map_.end()) { 174 return nullptr; 175 } 176 const char* raw_data = r.raw_data.data; 177 SlabFormat* format = it->second; 178 if (format->type == SlabFormat::KMEM_ALLOC) { 179 uint64_t call_site = format->call_site.ReadFromData(raw_data); 180 const Symbol* symbol = thread_tree_->FindKernelSymbol(call_site); 181 uint64_t ptr = format->ptr.ReadFromData(raw_data); 182 uint64_t bytes_req = format->bytes_req.ReadFromData(raw_data); 183 uint64_t bytes_alloc = format->bytes_alloc.ReadFromData(raw_data); 184 uint64_t gfp_flags = format->gfp_flags.ReadFromData(raw_data); 185 SlabSample* sample = 186 InsertSample(std::unique_ptr<SlabSample>(new SlabSample( 187 symbol, ptr, bytes_req, bytes_alloc, 1, gfp_flags, 0))); 188 alloc_cpu_record_map_.insert( 189 std::make_pair(ptr, std::make_pair(r.cpu_data.cpu, sample))); 190 acc_info->bytes_req = bytes_req; 191 acc_info->bytes_alloc = bytes_alloc; 192 return sample; 193 } else if (format->type == SlabFormat::KMEM_FREE) { 194 uint64_t ptr = format->ptr.ReadFromData(raw_data); 195 auto it = alloc_cpu_record_map_.find(ptr); 196 if (it != alloc_cpu_record_map_.end()) { 197 SlabSample* sample = it->second.second; 198 if (r.cpu_data.cpu != it->second.first) { 199 sample->cross_cpu_allocations++; 200 nr_cross_cpu_allocations_++; 201 } 202 alloc_cpu_record_map_.erase(it); 203 } 204 nr_frees_++; 205 } 206 return nullptr; 207 } 208 209 SlabSample* CreateBranchSample(const SampleRecord&, 210 const BranchStackItemType&) override { 211 return nullptr; 212 } 213 214 SlabSample* CreateCallChainSample( 215 const SlabSample* sample, uint64_t ip, bool in_kernel, 216 const std::vector<SlabSample*>& callchain, 217 const SlabAccumulateInfo& acc_info) override { 218 if (!in_kernel) { 219 return nullptr; 220 } 221 const Symbol* symbol = thread_tree_->FindKernelSymbol(ip); 222 return InsertCallChainSample( 223 std::unique_ptr<SlabSample>( 224 new SlabSample(symbol, sample->ptr, acc_info.bytes_req, 225 acc_info.bytes_alloc, 1, sample->gfp_flags, 0)), 226 callchain); 227 } 228 229 const ThreadEntry* GetThreadOfSample(SlabSample*) override { return nullptr; } 230 231 uint64_t GetPeriodForCallChain(const SlabAccumulateInfo&) override { 232 // Decide the percentage of callchain by the sample_count, so use 1 as the 233 // period when calling AddCallChain(). 234 return 1; 235 } 236 237 void UpdateSummary(const SlabSample* sample) override { 238 total_requested_bytes_ += sample->bytes_req; 239 total_allocated_bytes_ += sample->bytes_alloc; 240 nr_allocations_++; 241 } 242 243 void MergeSample(SlabSample* sample1, SlabSample* sample2) override { 244 sample1->bytes_req += sample2->bytes_req; 245 sample1->bytes_alloc += sample2->bytes_alloc; 246 sample1->sample_count += sample2->sample_count; 247 } 248 249 private: 250 ThreadTree* thread_tree_; 251 uint64_t total_requested_bytes_; 252 uint64_t total_allocated_bytes_; 253 uint64_t nr_allocations_; 254 uint64_t nr_frees_; 255 uint64_t nr_cross_cpu_allocations_; 256 257 std::unordered_map<uint64_t, SlabFormat*> event_id_to_format_map_; 258 std::vector<std::unique_ptr<SlabFormat>> formats_; 259 std::unordered_map<uint64_t, std::pair<uint32_t, SlabSample*>> 260 alloc_cpu_record_map_; 261}; 262 263using SlabSampleTreeSorter = SampleTreeSorter<SlabSample>; 264using SlabSampleTreeDisplayer = SampleTreeDisplayer<SlabSample, SlabSampleTree>; 265using SlabSampleCallgraphDisplayer = 266 CallgraphDisplayer<SlabSample, CallChainNode<SlabSample>>; 267 268struct EventAttrWithName { 269 perf_event_attr attr; 270 std::string name; 271 std::vector<uint64_t> event_ids; 272}; 273 274class KmemCommand : public Command { 275 public: 276 KmemCommand() 277 : Command( 278 "kmem", "collect kernel memory allocation information", 279 // clang-format off 280"Usage: kmem (record [record options] | report [report options])\n" 281"kmem record\n" 282"-g Enable call graph recording. Same as '--call-graph fp'.\n" 283"--slab Collect slab allocation information. Default option.\n" 284"Other record options provided by simpleperf record command are also available.\n" 285"kmem report\n" 286"--children Print the accumulated allocation info appeared in the callchain.\n" 287" Can be used on perf.data recorded with `--call-graph fp` option.\n" 288"-g [callee|caller] Print call graph for perf.data recorded with\n" 289" `--call-graph fp` option. If callee mode is used, the graph\n" 290" shows how functions are called from others. Otherwise, the\n" 291" graph shows how functions call others. Default is callee\n" 292" mode. The percentage shown in the graph is determined by\n" 293" the hit count of the callchain.\n" 294"-i Specify path of record file, default is perf.data\n" 295"-o report_file_name Set report file name, default is stdout.\n" 296"--slab Report slab allocation information. Default option.\n" 297"--slab-sort key1,key2,...\n" 298" Select the keys to sort and print slab allocation information.\n" 299" Should be used with --slab option. Possible keys include:\n" 300" hit -- the allocation count.\n" 301" caller -- the function calling allocation.\n" 302" ptr -- the address of the allocated space.\n" 303" bytes_req -- the total requested space size.\n" 304" bytes_alloc -- the total allocated space size.\n" 305" fragment -- the extra allocated space size\n" 306" (bytes_alloc - bytes_req).\n" 307" gfp_flags -- the flags used for allocation.\n" 308" pingpong -- the count of allocations that are freed not on\n" 309" the cpu allocating them.\n" 310" The default slab sort keys are:\n" 311" hit,caller,bytes_req,bytes_alloc,fragment,pingpong.\n" 312 // clang-format on 313 ), 314 is_record_(false), 315 use_slab_(false), 316 accumulate_callchain_(false), 317 print_callgraph_(false), 318 callgraph_show_callee_(false), 319 record_filename_("perf.data"), 320 record_file_arch_(GetBuildArch()) {} 321 322 bool Run(const std::vector<std::string>& args); 323 324 private: 325 bool ParseOptions(const std::vector<std::string>& args, 326 std::vector<std::string>* left_args); 327 bool RecordKmemInfo(const std::vector<std::string>& record_args); 328 bool ReportKmemInfo(); 329 bool PrepareToBuildSampleTree(); 330 void ReadEventAttrsFromRecordFile(); 331 bool ReadFeaturesFromRecordFile(); 332 bool ReadSampleTreeFromRecordFile(); 333 bool ProcessRecord(std::unique_ptr<Record> record); 334 void ProcessTracingData(const std::vector<char>& data); 335 bool PrintReport(); 336 void PrintReportContext(FILE* fp); 337 void PrintSlabReportContext(FILE* fp); 338 339 bool is_record_; 340 bool use_slab_; 341 std::vector<std::string> slab_sort_keys_; 342 bool accumulate_callchain_; 343 bool print_callgraph_; 344 bool callgraph_show_callee_; 345 346 std::string record_filename_; 347 std::unique_ptr<RecordFileReader> record_file_reader_; 348 std::vector<EventAttrWithName> event_attrs_; 349 std::string record_cmdline_; 350 ArchType record_file_arch_; 351 352 ThreadTree thread_tree_; 353 SlabSampleTree slab_sample_tree_; 354 std::unique_ptr<SlabSampleTreeBuilder> slab_sample_tree_builder_; 355 std::unique_ptr<SlabSampleTreeSorter> slab_sample_tree_sorter_; 356 std::unique_ptr<SlabSampleTreeDisplayer> slab_sample_tree_displayer_; 357 358 std::string report_filename_; 359}; 360 361bool KmemCommand::Run(const std::vector<std::string>& args) { 362 std::vector<std::string> left_args; 363 if (!ParseOptions(args, &left_args)) { 364 return false; 365 } 366 if (!use_slab_) { 367 use_slab_ = true; 368 } 369 if (is_record_) { 370 return RecordKmemInfo(left_args); 371 } 372 return ReportKmemInfo(); 373} 374 375bool KmemCommand::ParseOptions(const std::vector<std::string>& args, 376 std::vector<std::string>* left_args) { 377 if (args.empty()) { 378 LOG(ERROR) << "No subcommand specified"; 379 return false; 380 } 381 if (args[0] == "record") { 382 if (!IsRoot()) { 383 LOG(ERROR) << "simpleperf kmem record command needs root privilege"; 384 return false; 385 } 386 is_record_ = true; 387 size_t i; 388 for (i = 1; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) { 389 if (args[i] == "-g") { 390 left_args->push_back("--call-graph"); 391 left_args->push_back("fp"); 392 } else if (args[i] == "--slab") { 393 use_slab_ = true; 394 } else { 395 left_args->push_back(args[i]); 396 } 397 } 398 left_args->insert(left_args->end(), args.begin() + i, args.end()); 399 } else if (args[0] == "report") { 400 is_record_ = false; 401 for (size_t i = 1; i < args.size(); ++i) { 402 if (args[i] == "--children") { 403 accumulate_callchain_ = true; 404 } else if (args[i] == "-g") { 405 print_callgraph_ = true; 406 accumulate_callchain_ = true; 407 callgraph_show_callee_ = true; 408 if (i + 1 < args.size() && args[i + 1][0] != '-') { 409 ++i; 410 if (args[i] == "callee") { 411 callgraph_show_callee_ = true; 412 } else if (args[i] == "caller") { 413 callgraph_show_callee_ = false; 414 } else { 415 LOG(ERROR) << "Unknown argument with -g option: " << args[i]; 416 return false; 417 } 418 } 419 } else if (args[i] == "-i") { 420 if (!NextArgumentOrError(args, &i)) { 421 return false; 422 } 423 record_filename_ = args[i]; 424 } else if (args[i] == "-o") { 425 if (!NextArgumentOrError(args, &i)) { 426 return false; 427 } 428 report_filename_ = args[i]; 429 } else if (args[i] == "--slab") { 430 use_slab_ = true; 431 } else if (args[i] == "--slab-sort") { 432 if (!NextArgumentOrError(args, &i)) { 433 return false; 434 } 435 slab_sort_keys_ = android::base::Split(args[i], ","); 436 } else { 437 ReportUnknownOption(args, i); 438 return false; 439 } 440 } 441 } else { 442 LOG(ERROR) << "Unknown subcommand for " << Name() << ": " << args[0] 443 << ". Try `simpleperf help " << Name() << "`"; 444 return false; 445 } 446 return true; 447} 448 449bool KmemCommand::RecordKmemInfo(const std::vector<std::string>& record_args) { 450 std::vector<std::string> args; 451 if (use_slab_) { 452 std::vector<std::string> trace_events = { 453 "kmem:kmalloc", "kmem:kmem_cache_alloc", 454 "kmem:kmalloc_node", "kmem:kmem_cache_alloc_node", 455 "kmem:kfree", "kmem:kmem_cache_free"}; 456 for (const auto& name : trace_events) { 457 if (ParseEventType(name)) { 458 args.insert(args.end(), {"-e", name}); 459 } 460 } 461 } 462 if (args.empty()) { 463 LOG(ERROR) << "Kernel allocation related trace events are not supported."; 464 return false; 465 } 466 args.push_back("-a"); 467 args.insert(args.end(), record_args.begin(), record_args.end()); 468 std::unique_ptr<Command> record_cmd = CreateCommandInstance("record"); 469 if (record_cmd == nullptr) { 470 LOG(ERROR) << "record command isn't available"; 471 return false; 472 } 473 return record_cmd->Run(args); 474} 475 476bool KmemCommand::ReportKmemInfo() { 477 if (!PrepareToBuildSampleTree()) { 478 return false; 479 } 480 record_file_reader_ = RecordFileReader::CreateInstance(record_filename_); 481 if (record_file_reader_ == nullptr) { 482 return false; 483 } 484 ReadEventAttrsFromRecordFile(); 485 if (!ReadFeaturesFromRecordFile()) { 486 return false; 487 } 488 if (!ReadSampleTreeFromRecordFile()) { 489 return false; 490 } 491 if (!PrintReport()) { 492 return false; 493 } 494 return true; 495} 496 497bool KmemCommand::PrepareToBuildSampleTree() { 498 if (use_slab_) { 499 if (slab_sort_keys_.empty()) { 500 slab_sort_keys_ = {"hit", "caller", "bytes_req", 501 "bytes_alloc", "fragment", "pingpong"}; 502 } 503 SampleComparator<SlabSample> comparator; 504 SampleComparator<SlabSample> sort_comparator; 505 SampleDisplayer<SlabSample, SlabSampleTree> displayer; 506 std::string accumulated_name = accumulate_callchain_ ? "Accumulated_" : ""; 507 508 if (print_callgraph_) { 509 displayer.AddExclusiveDisplayFunction(SlabSampleCallgraphDisplayer()); 510 } 511 512 for (const auto& key : slab_sort_keys_) { 513 if (key == "hit") { 514 sort_comparator.AddCompareFunction(CompareSampleCount); 515 displayer.AddDisplayFunction(accumulated_name + "Hit", 516 DisplaySampleCount); 517 } else if (key == "caller") { 518 comparator.AddCompareFunction(CompareSymbol); 519 displayer.AddDisplayFunction("Caller", DisplaySymbol); 520 } else if (key == "ptr") { 521 comparator.AddCompareFunction(ComparePtr); 522 displayer.AddDisplayFunction("Ptr", DisplayPtr); 523 } else if (key == "bytes_req") { 524 sort_comparator.AddCompareFunction(CompareBytesReq); 525 displayer.AddDisplayFunction(accumulated_name + "BytesReq", 526 DisplayBytesReq); 527 } else if (key == "bytes_alloc") { 528 sort_comparator.AddCompareFunction(CompareBytesAlloc); 529 displayer.AddDisplayFunction(accumulated_name + "BytesAlloc", 530 DisplayBytesAlloc); 531 } else if (key == "fragment") { 532 sort_comparator.AddCompareFunction(CompareFragment); 533 displayer.AddDisplayFunction(accumulated_name + "Fragment", 534 DisplayFragment); 535 } else if (key == "gfp_flags") { 536 comparator.AddCompareFunction(CompareGfpFlags); 537 displayer.AddDisplayFunction("GfpFlags", DisplayGfpFlags); 538 } else if (key == "pingpong") { 539 sort_comparator.AddCompareFunction(CompareCrossCpuAllocations); 540 displayer.AddDisplayFunction("Pingpong", DisplayCrossCpuAllocations); 541 } else { 542 LOG(ERROR) << "Unknown sort key for slab allocation: " << key; 543 return false; 544 } 545 slab_sample_tree_builder_.reset( 546 new SlabSampleTreeBuilder(comparator, &thread_tree_)); 547 slab_sample_tree_builder_->SetCallChainSampleOptions( 548 accumulate_callchain_, print_callgraph_, !callgraph_show_callee_, 549 false); 550 sort_comparator.AddComparator(comparator); 551 slab_sample_tree_sorter_.reset(new SlabSampleTreeSorter(sort_comparator)); 552 slab_sample_tree_displayer_.reset(new SlabSampleTreeDisplayer(displayer)); 553 } 554 } 555 return true; 556} 557 558void KmemCommand::ReadEventAttrsFromRecordFile() { 559 std::vector<EventAttrWithId> attrs = record_file_reader_->AttrSection(); 560 for (const auto& attr_with_id : attrs) { 561 EventAttrWithName attr; 562 attr.attr = *attr_with_id.attr; 563 attr.event_ids = attr_with_id.ids; 564 attr.name = GetEventNameByAttr(attr.attr); 565 event_attrs_.push_back(attr); 566 } 567} 568 569bool KmemCommand::ReadFeaturesFromRecordFile() { 570 record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree_); 571 std::string arch = 572 record_file_reader_->ReadFeatureString(PerfFileFormat::FEAT_ARCH); 573 if (!arch.empty()) { 574 record_file_arch_ = GetArchType(arch); 575 if (record_file_arch_ == ARCH_UNSUPPORTED) { 576 return false; 577 } 578 } 579 std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature(); 580 if (!cmdline.empty()) { 581 record_cmdline_ = android::base::Join(cmdline, ' '); 582 } 583 if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_TRACING_DATA)) { 584 std::vector<char> tracing_data; 585 if (!record_file_reader_->ReadFeatureSection( 586 PerfFileFormat::FEAT_TRACING_DATA, &tracing_data)) { 587 return false; 588 } 589 ProcessTracingData(tracing_data); 590 } 591 return true; 592} 593 594bool KmemCommand::ReadSampleTreeFromRecordFile() { 595 if (!record_file_reader_->ReadDataSection( 596 [this](std::unique_ptr<Record> record) { 597 return ProcessRecord(std::move(record)); 598 })) { 599 return false; 600 } 601 if (use_slab_) { 602 slab_sample_tree_ = slab_sample_tree_builder_->GetSampleTree(); 603 slab_sample_tree_sorter_->Sort(slab_sample_tree_.samples, print_callgraph_); 604 } 605 return true; 606} 607 608bool KmemCommand::ProcessRecord(std::unique_ptr<Record> record) { 609 thread_tree_.Update(*record); 610 if (record->type() == PERF_RECORD_SAMPLE) { 611 if (use_slab_) { 612 slab_sample_tree_builder_->ProcessSampleRecord( 613 *static_cast<const SampleRecord*>(record.get())); 614 } 615 } else if (record->type() == PERF_RECORD_TRACING_DATA) { 616 const auto& r = *static_cast<TracingDataRecord*>(record.get()); 617 ProcessTracingData(std::vector<char>(r.data, r.data + r.data_size)); 618 } 619 return true; 620} 621 622void KmemCommand::ProcessTracingData(const std::vector<char>& data) { 623 Tracing tracing(data); 624 for (auto& attr : event_attrs_) { 625 if (attr.attr.type == PERF_TYPE_TRACEPOINT) { 626 uint64_t trace_event_id = attr.attr.config; 627 attr.name = tracing.GetTracingEventNameHavingId(trace_event_id); 628 TracingFormat format = tracing.GetTracingFormatHavingId(trace_event_id); 629 if (use_slab_) { 630 if (format.name == "kmalloc" || format.name == "kmem_cache_alloc" || 631 format.name == "kmalloc_node" || 632 format.name == "kmem_cache_alloc_node") { 633 SlabFormat f; 634 f.type = SlabFormat::KMEM_ALLOC; 635 format.GetField("call_site", f.call_site); 636 format.GetField("ptr", f.ptr); 637 format.GetField("bytes_req", f.bytes_req); 638 format.GetField("bytes_alloc", f.bytes_alloc); 639 format.GetField("gfp_flags", f.gfp_flags); 640 slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f); 641 } else if (format.name == "kfree" || format.name == "kmem_cache_free") { 642 SlabFormat f; 643 f.type = SlabFormat::KMEM_FREE; 644 format.GetField("call_site", f.call_site); 645 format.GetField("ptr", f.ptr); 646 slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f); 647 } 648 } 649 } 650 } 651} 652 653bool KmemCommand::PrintReport() { 654 std::unique_ptr<FILE, decltype(&fclose)> file_handler(nullptr, fclose); 655 FILE* report_fp = stdout; 656 if (!report_filename_.empty()) { 657 file_handler.reset(fopen(report_filename_.c_str(), "w")); 658 if (file_handler == nullptr) { 659 PLOG(ERROR) << "failed to open " << report_filename_; 660 return false; 661 } 662 report_fp = file_handler.get(); 663 } 664 PrintReportContext(report_fp); 665 if (use_slab_) { 666 fprintf(report_fp, "\n\n"); 667 PrintSlabReportContext(report_fp); 668 slab_sample_tree_displayer_->DisplaySamples( 669 report_fp, slab_sample_tree_.samples, &slab_sample_tree_); 670 } 671 return true; 672} 673 674void KmemCommand::PrintReportContext(FILE* fp) { 675 if (!record_cmdline_.empty()) { 676 fprintf(fp, "Cmdline: %s\n", record_cmdline_.c_str()); 677 } 678 fprintf(fp, "Arch: %s\n", GetArchString(record_file_arch_).c_str()); 679 for (const auto& attr : event_attrs_) { 680 fprintf(fp, "Event: %s (type %u, config %llu)\n", attr.name.c_str(), 681 attr.attr.type, attr.attr.config); 682 } 683} 684 685void KmemCommand::PrintSlabReportContext(FILE* fp) { 686 fprintf(fp, "Slab allocation information:\n"); 687 fprintf(fp, "Total requested bytes: %" PRIu64 "\n", 688 slab_sample_tree_.total_requested_bytes); 689 fprintf(fp, "Total allocated bytes: %" PRIu64 "\n", 690 slab_sample_tree_.total_allocated_bytes); 691 uint64_t fragment = slab_sample_tree_.total_allocated_bytes - 692 slab_sample_tree_.total_requested_bytes; 693 double percentage = 0.0; 694 if (slab_sample_tree_.total_allocated_bytes != 0) { 695 percentage = 100.0 * fragment / slab_sample_tree_.total_allocated_bytes; 696 } 697 fprintf(fp, "Total fragment: %" PRIu64 ", %f%%\n", fragment, percentage); 698 fprintf(fp, "Total allocations: %" PRIu64 "\n", 699 slab_sample_tree_.nr_allocations); 700 fprintf(fp, "Total frees: %" PRIu64 "\n", slab_sample_tree_.nr_frees); 701 percentage = 0.0; 702 if (slab_sample_tree_.nr_allocations != 0) { 703 percentage = 100.0 * slab_sample_tree_.nr_cross_cpu_allocations / 704 slab_sample_tree_.nr_allocations; 705 } 706 fprintf(fp, "Total cross cpu allocation/free: %" PRIu64 ", %f%%\n", 707 slab_sample_tree_.nr_cross_cpu_allocations, percentage); 708 fprintf(fp, "\n"); 709} 710 711} // namespace 712 713void RegisterKmemCommand() { 714 RegisterCommand("kmem", 715 [] { return std::unique_ptr<Command>(new KmemCommand()); }); 716} 717