1/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "command.h"
18
19#include <unordered_map>
20
21#include <android-base/logging.h>
22#include <android-base/strings.h>
23
24#include "callchain.h"
25#include "event_attr.h"
26#include "event_type.h"
27#include "record_file.h"
28#include "sample_tree.h"
29#include "tracing.h"
30#include "utils.h"
31
32namespace {
33
34struct SlabSample {
35  const Symbol* symbol;            // the function making allocation
36  uint64_t ptr;                    // the start address of the allocated space
37  uint64_t bytes_req;              // requested space size
38  uint64_t bytes_alloc;            // allocated space size
39  uint64_t sample_count;           // count of allocations
40  uint64_t gfp_flags;              // flags used for allocation
41  uint64_t cross_cpu_allocations;  // count of allocations freed not on the
42                                   // cpu allocating them
43  CallChainRoot<SlabSample> callchain;  // a callchain tree representing all
44                                        // callchains in this sample
45  SlabSample(const Symbol* symbol, uint64_t ptr, uint64_t bytes_req,
46             uint64_t bytes_alloc, uint64_t sample_count, uint64_t gfp_flags,
47             uint64_t cross_cpu_allocations)
48      : symbol(symbol),
49        ptr(ptr),
50        bytes_req(bytes_req),
51        bytes_alloc(bytes_alloc),
52        sample_count(sample_count),
53        gfp_flags(gfp_flags),
54        cross_cpu_allocations(cross_cpu_allocations) {}
55
56  uint64_t GetPeriod() const {
57    return sample_count;
58  }
59};
60
61struct SlabAccumulateInfo {
62  uint64_t bytes_req;
63  uint64_t bytes_alloc;
64};
65
66BUILD_COMPARE_VALUE_FUNCTION(ComparePtr, ptr);
67BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesReq, bytes_req);
68BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesAlloc, bytes_alloc);
69BUILD_COMPARE_VALUE_FUNCTION(CompareGfpFlags, gfp_flags);
70BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareCrossCpuAllocations,
71                                     cross_cpu_allocations);
72
73BUILD_DISPLAY_HEX64_FUNCTION(DisplayPtr, ptr);
74BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesReq, bytes_req);
75BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesAlloc, bytes_alloc);
76BUILD_DISPLAY_HEX64_FUNCTION(DisplayGfpFlags, gfp_flags);
77BUILD_DISPLAY_UINT64_FUNCTION(DisplayCrossCpuAllocations,
78                              cross_cpu_allocations);
79
80static int CompareFragment(const SlabSample* sample1,
81                           const SlabSample* sample2) {
82  uint64_t frag1 = sample1->bytes_alloc - sample1->bytes_req;
83  uint64_t frag2 = sample2->bytes_alloc - sample2->bytes_req;
84  return Compare(frag2, frag1);
85}
86
87static std::string DisplayFragment(const SlabSample* sample) {
88  return android::base::StringPrintf("%" PRIu64,
89                                     sample->bytes_alloc - sample->bytes_req);
90}
91
92struct SlabSampleTree {
93  std::vector<SlabSample*> samples;
94  uint64_t total_requested_bytes;
95  uint64_t total_allocated_bytes;
96  uint64_t nr_allocations;
97  uint64_t nr_frees;
98  uint64_t nr_cross_cpu_allocations;
99};
100
101struct SlabFormat {
102  enum {
103    KMEM_ALLOC,
104    KMEM_FREE,
105  } type;
106  TracingFieldPlace call_site;
107  TracingFieldPlace ptr;
108  TracingFieldPlace bytes_req;
109  TracingFieldPlace bytes_alloc;
110  TracingFieldPlace gfp_flags;
111};
112
113class SlabSampleTreeBuilder
114    : public SampleTreeBuilder<SlabSample, SlabAccumulateInfo> {
115 public:
116  SlabSampleTreeBuilder(SampleComparator<SlabSample> sample_comparator,
117                        ThreadTree* thread_tree)
118      : SampleTreeBuilder(sample_comparator),
119        thread_tree_(thread_tree),
120        total_requested_bytes_(0),
121        total_allocated_bytes_(0),
122        nr_allocations_(0),
123        nr_cross_cpu_allocations_(0) {}
124
125  SlabSampleTree GetSampleTree() const {
126    SlabSampleTree sample_tree;
127    sample_tree.samples = GetSamples();
128    sample_tree.total_requested_bytes = total_requested_bytes_;
129    sample_tree.total_allocated_bytes = total_allocated_bytes_;
130    sample_tree.nr_allocations = nr_allocations_;
131    sample_tree.nr_frees = nr_frees_;
132    sample_tree.nr_cross_cpu_allocations = nr_cross_cpu_allocations_;
133    return sample_tree;
134  }
135
136  void AddSlabFormat(const std::vector<uint64_t>& event_ids,
137                     SlabFormat format) {
138    std::unique_ptr<SlabFormat> p(new SlabFormat(format));
139    for (auto id : event_ids) {
140      event_id_to_format_map_[id] = p.get();
141    }
142    formats_.push_back(std::move(p));
143  }
144
145 protected:
146  SlabSample* CreateSample(const SampleRecord& r, bool in_kernel,
147                           SlabAccumulateInfo* acc_info) override {
148    if (!in_kernel) {
149      // Normally we don't parse records in user space because tracepoint
150      // events all happen in kernel. But if r.ip_data.ip == 0, it may be
151      // a kernel record failed to dump ip register and is still useful.
152      if (r.ip_data.ip == 0) {
153        // It seems we are on a kernel can't dump regset for tracepoint events
154        // because of lacking perf_arch_fetch_caller_regs(). We can't get
155        // callchain, but we can still do a normal report.
156        static bool first = true;
157        if (first) {
158          first = false;
159          if (accumulate_callchain_) {
160            // The kernel doesn't seem to support dumping registers for
161            // tracepoint events because of lacking
162            // perf_arch_fetch_caller_regs().
163            LOG(WARNING) << "simpleperf may not get callchains for tracepoint"
164                         << " events because of lacking kernel support.";
165          }
166        }
167      } else {
168        return nullptr;
169      }
170    }
171    uint64_t id = r.id_data.id;
172    auto it = event_id_to_format_map_.find(id);
173    if (it == event_id_to_format_map_.end()) {
174      return nullptr;
175    }
176    const char* raw_data = r.raw_data.data;
177    SlabFormat* format = it->second;
178    if (format->type == SlabFormat::KMEM_ALLOC) {
179      uint64_t call_site = format->call_site.ReadFromData(raw_data);
180      const Symbol* symbol = thread_tree_->FindKernelSymbol(call_site);
181      uint64_t ptr = format->ptr.ReadFromData(raw_data);
182      uint64_t bytes_req = format->bytes_req.ReadFromData(raw_data);
183      uint64_t bytes_alloc = format->bytes_alloc.ReadFromData(raw_data);
184      uint64_t gfp_flags = format->gfp_flags.ReadFromData(raw_data);
185      SlabSample* sample =
186          InsertSample(std::unique_ptr<SlabSample>(new SlabSample(
187              symbol, ptr, bytes_req, bytes_alloc, 1, gfp_flags, 0)));
188      alloc_cpu_record_map_.insert(
189          std::make_pair(ptr, std::make_pair(r.cpu_data.cpu, sample)));
190      acc_info->bytes_req = bytes_req;
191      acc_info->bytes_alloc = bytes_alloc;
192      return sample;
193    } else if (format->type == SlabFormat::KMEM_FREE) {
194      uint64_t ptr = format->ptr.ReadFromData(raw_data);
195      auto it = alloc_cpu_record_map_.find(ptr);
196      if (it != alloc_cpu_record_map_.end()) {
197        SlabSample* sample = it->second.second;
198        if (r.cpu_data.cpu != it->second.first) {
199          sample->cross_cpu_allocations++;
200          nr_cross_cpu_allocations_++;
201        }
202        alloc_cpu_record_map_.erase(it);
203      }
204      nr_frees_++;
205    }
206    return nullptr;
207  }
208
209  SlabSample* CreateBranchSample(const SampleRecord&,
210                                 const BranchStackItemType&) override {
211    return nullptr;
212  }
213
214  SlabSample* CreateCallChainSample(
215      const SlabSample* sample, uint64_t ip, bool in_kernel,
216      const std::vector<SlabSample*>& callchain,
217      const SlabAccumulateInfo& acc_info) override {
218    if (!in_kernel) {
219      return nullptr;
220    }
221    const Symbol* symbol = thread_tree_->FindKernelSymbol(ip);
222    return InsertCallChainSample(
223        std::unique_ptr<SlabSample>(
224            new SlabSample(symbol, sample->ptr, acc_info.bytes_req,
225                           acc_info.bytes_alloc, 1, sample->gfp_flags, 0)),
226        callchain);
227  }
228
229  const ThreadEntry* GetThreadOfSample(SlabSample*) override { return nullptr; }
230
231  uint64_t GetPeriodForCallChain(const SlabAccumulateInfo&) override {
232    // Decide the percentage of callchain by the sample_count, so use 1 as the
233    // period when calling AddCallChain().
234    return 1;
235  }
236
237  void UpdateSummary(const SlabSample* sample) override {
238    total_requested_bytes_ += sample->bytes_req;
239    total_allocated_bytes_ += sample->bytes_alloc;
240    nr_allocations_++;
241  }
242
243  void MergeSample(SlabSample* sample1, SlabSample* sample2) override {
244    sample1->bytes_req += sample2->bytes_req;
245    sample1->bytes_alloc += sample2->bytes_alloc;
246    sample1->sample_count += sample2->sample_count;
247  }
248
249 private:
250  ThreadTree* thread_tree_;
251  uint64_t total_requested_bytes_;
252  uint64_t total_allocated_bytes_;
253  uint64_t nr_allocations_;
254  uint64_t nr_frees_;
255  uint64_t nr_cross_cpu_allocations_;
256
257  std::unordered_map<uint64_t, SlabFormat*> event_id_to_format_map_;
258  std::vector<std::unique_ptr<SlabFormat>> formats_;
259  std::unordered_map<uint64_t, std::pair<uint32_t, SlabSample*>>
260      alloc_cpu_record_map_;
261};
262
263using SlabSampleTreeSorter = SampleTreeSorter<SlabSample>;
264using SlabSampleTreeDisplayer = SampleTreeDisplayer<SlabSample, SlabSampleTree>;
265using SlabSampleCallgraphDisplayer =
266    CallgraphDisplayer<SlabSample, CallChainNode<SlabSample>>;
267
268struct EventAttrWithName {
269  perf_event_attr attr;
270  std::string name;
271  std::vector<uint64_t> event_ids;
272};
273
274class KmemCommand : public Command {
275 public:
276  KmemCommand()
277      : Command(
278            "kmem", "collect kernel memory allocation information",
279            // clang-format off
280"Usage: kmem (record [record options] | report [report options])\n"
281"kmem record\n"
282"-g        Enable call graph recording. Same as '--call-graph fp'.\n"
283"--slab    Collect slab allocation information. Default option.\n"
284"Other record options provided by simpleperf record command are also available.\n"
285"kmem report\n"
286"--children  Print the accumulated allocation info appeared in the callchain.\n"
287"            Can be used on perf.data recorded with `--call-graph fp` option.\n"
288"-g [callee|caller]  Print call graph for perf.data recorded with\n"
289"                    `--call-graph fp` option. If callee mode is used, the graph\n"
290"                     shows how functions are called from others. Otherwise, the\n"
291"                     graph shows how functions call others. Default is callee\n"
292"                     mode. The percentage shown in the graph is determined by\n"
293"                     the hit count of the callchain.\n"
294"-i          Specify path of record file, default is perf.data\n"
295"-o report_file_name  Set report file name, default is stdout.\n"
296"--slab      Report slab allocation information. Default option.\n"
297"--slab-sort key1,key2,...\n"
298"            Select the keys to sort and print slab allocation information.\n"
299"            Should be used with --slab option. Possible keys include:\n"
300"              hit         -- the allocation count.\n"
301"              caller      -- the function calling allocation.\n"
302"              ptr         -- the address of the allocated space.\n"
303"              bytes_req   -- the total requested space size.\n"
304"              bytes_alloc -- the total allocated space size.\n"
305"              fragment    -- the extra allocated space size\n"
306"                             (bytes_alloc - bytes_req).\n"
307"              gfp_flags   -- the flags used for allocation.\n"
308"              pingpong    -- the count of allocations that are freed not on\n"
309"                             the cpu allocating them.\n"
310"            The default slab sort keys are:\n"
311"              hit,caller,bytes_req,bytes_alloc,fragment,pingpong.\n"
312            // clang-format on
313            ),
314        is_record_(false),
315        use_slab_(false),
316        accumulate_callchain_(false),
317        print_callgraph_(false),
318        callgraph_show_callee_(false),
319        record_filename_("perf.data"),
320        record_file_arch_(GetBuildArch()) {}
321
322  bool Run(const std::vector<std::string>& args);
323
324 private:
325  bool ParseOptions(const std::vector<std::string>& args,
326                    std::vector<std::string>* left_args);
327  bool RecordKmemInfo(const std::vector<std::string>& record_args);
328  bool ReportKmemInfo();
329  bool PrepareToBuildSampleTree();
330  void ReadEventAttrsFromRecordFile();
331  bool ReadFeaturesFromRecordFile();
332  bool ReadSampleTreeFromRecordFile();
333  bool ProcessRecord(std::unique_ptr<Record> record);
334  void ProcessTracingData(const std::vector<char>& data);
335  bool PrintReport();
336  void PrintReportContext(FILE* fp);
337  void PrintSlabReportContext(FILE* fp);
338
339  bool is_record_;
340  bool use_slab_;
341  std::vector<std::string> slab_sort_keys_;
342  bool accumulate_callchain_;
343  bool print_callgraph_;
344  bool callgraph_show_callee_;
345
346  std::string record_filename_;
347  std::unique_ptr<RecordFileReader> record_file_reader_;
348  std::vector<EventAttrWithName> event_attrs_;
349  std::string record_cmdline_;
350  ArchType record_file_arch_;
351
352  ThreadTree thread_tree_;
353  SlabSampleTree slab_sample_tree_;
354  std::unique_ptr<SlabSampleTreeBuilder> slab_sample_tree_builder_;
355  std::unique_ptr<SlabSampleTreeSorter> slab_sample_tree_sorter_;
356  std::unique_ptr<SlabSampleTreeDisplayer> slab_sample_tree_displayer_;
357
358  std::string report_filename_;
359};
360
361bool KmemCommand::Run(const std::vector<std::string>& args) {
362  std::vector<std::string> left_args;
363  if (!ParseOptions(args, &left_args)) {
364    return false;
365  }
366  if (!use_slab_) {
367    use_slab_ = true;
368  }
369  if (is_record_) {
370    return RecordKmemInfo(left_args);
371  }
372  return ReportKmemInfo();
373}
374
375bool KmemCommand::ParseOptions(const std::vector<std::string>& args,
376                               std::vector<std::string>* left_args) {
377  if (args.empty()) {
378    LOG(ERROR) << "No subcommand specified";
379    return false;
380  }
381  if (args[0] == "record") {
382    if (!IsRoot()) {
383      LOG(ERROR) << "simpleperf kmem record command needs root privilege";
384      return false;
385    }
386    is_record_ = true;
387    size_t i;
388    for (i = 1; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
389      if (args[i] == "-g") {
390        left_args->push_back("--call-graph");
391        left_args->push_back("fp");
392      } else if (args[i] == "--slab") {
393        use_slab_ = true;
394      } else {
395        left_args->push_back(args[i]);
396      }
397    }
398    left_args->insert(left_args->end(), args.begin() + i, args.end());
399  } else if (args[0] == "report") {
400    is_record_ = false;
401    for (size_t i = 1; i < args.size(); ++i) {
402      if (args[i] == "--children") {
403        accumulate_callchain_ = true;
404      } else if (args[i] == "-g") {
405        print_callgraph_ = true;
406        accumulate_callchain_ = true;
407        callgraph_show_callee_ = true;
408        if (i + 1 < args.size() && args[i + 1][0] != '-') {
409          ++i;
410          if (args[i] == "callee") {
411            callgraph_show_callee_ = true;
412          } else if (args[i] == "caller") {
413            callgraph_show_callee_ = false;
414          } else {
415            LOG(ERROR) << "Unknown argument with -g option: " << args[i];
416            return false;
417          }
418        }
419      } else if (args[i] == "-i") {
420        if (!NextArgumentOrError(args, &i)) {
421          return false;
422        }
423        record_filename_ = args[i];
424      } else if (args[i] == "-o") {
425        if (!NextArgumentOrError(args, &i)) {
426          return false;
427        }
428        report_filename_ = args[i];
429      } else if (args[i] == "--slab") {
430        use_slab_ = true;
431      } else if (args[i] == "--slab-sort") {
432        if (!NextArgumentOrError(args, &i)) {
433          return false;
434        }
435        slab_sort_keys_ = android::base::Split(args[i], ",");
436      } else {
437        ReportUnknownOption(args, i);
438        return false;
439      }
440    }
441  } else {
442    LOG(ERROR) << "Unknown subcommand for " << Name() << ": " << args[0]
443               << ". Try `simpleperf help " << Name() << "`";
444    return false;
445  }
446  return true;
447}
448
449bool KmemCommand::RecordKmemInfo(const std::vector<std::string>& record_args) {
450  std::vector<std::string> args;
451  if (use_slab_) {
452    std::vector<std::string> trace_events = {
453        "kmem:kmalloc",      "kmem:kmem_cache_alloc",
454        "kmem:kmalloc_node", "kmem:kmem_cache_alloc_node",
455        "kmem:kfree",        "kmem:kmem_cache_free"};
456    for (const auto& name : trace_events) {
457      if (ParseEventType(name)) {
458        args.insert(args.end(), {"-e", name});
459      }
460    }
461  }
462  if (args.empty()) {
463    LOG(ERROR) << "Kernel allocation related trace events are not supported.";
464    return false;
465  }
466  args.push_back("-a");
467  args.insert(args.end(), record_args.begin(), record_args.end());
468  std::unique_ptr<Command> record_cmd = CreateCommandInstance("record");
469  if (record_cmd == nullptr) {
470    LOG(ERROR) << "record command isn't available";
471    return false;
472  }
473  return record_cmd->Run(args);
474}
475
476bool KmemCommand::ReportKmemInfo() {
477  if (!PrepareToBuildSampleTree()) {
478    return false;
479  }
480  record_file_reader_ = RecordFileReader::CreateInstance(record_filename_);
481  if (record_file_reader_ == nullptr) {
482    return false;
483  }
484  ReadEventAttrsFromRecordFile();
485  if (!ReadFeaturesFromRecordFile()) {
486    return false;
487  }
488  if (!ReadSampleTreeFromRecordFile()) {
489    return false;
490  }
491  if (!PrintReport()) {
492    return false;
493  }
494  return true;
495}
496
497bool KmemCommand::PrepareToBuildSampleTree() {
498  if (use_slab_) {
499    if (slab_sort_keys_.empty()) {
500      slab_sort_keys_ = {"hit",         "caller",   "bytes_req",
501                         "bytes_alloc", "fragment", "pingpong"};
502    }
503    SampleComparator<SlabSample> comparator;
504    SampleComparator<SlabSample> sort_comparator;
505    SampleDisplayer<SlabSample, SlabSampleTree> displayer;
506    std::string accumulated_name = accumulate_callchain_ ? "Accumulated_" : "";
507
508    if (print_callgraph_) {
509      displayer.AddExclusiveDisplayFunction(SlabSampleCallgraphDisplayer());
510    }
511
512    for (const auto& key : slab_sort_keys_) {
513      if (key == "hit") {
514        sort_comparator.AddCompareFunction(CompareSampleCount);
515        displayer.AddDisplayFunction(accumulated_name + "Hit",
516                                     DisplaySampleCount);
517      } else if (key == "caller") {
518        comparator.AddCompareFunction(CompareSymbol);
519        displayer.AddDisplayFunction("Caller", DisplaySymbol);
520      } else if (key == "ptr") {
521        comparator.AddCompareFunction(ComparePtr);
522        displayer.AddDisplayFunction("Ptr", DisplayPtr);
523      } else if (key == "bytes_req") {
524        sort_comparator.AddCompareFunction(CompareBytesReq);
525        displayer.AddDisplayFunction(accumulated_name + "BytesReq",
526                                     DisplayBytesReq);
527      } else if (key == "bytes_alloc") {
528        sort_comparator.AddCompareFunction(CompareBytesAlloc);
529        displayer.AddDisplayFunction(accumulated_name + "BytesAlloc",
530                                     DisplayBytesAlloc);
531      } else if (key == "fragment") {
532        sort_comparator.AddCompareFunction(CompareFragment);
533        displayer.AddDisplayFunction(accumulated_name + "Fragment",
534                                     DisplayFragment);
535      } else if (key == "gfp_flags") {
536        comparator.AddCompareFunction(CompareGfpFlags);
537        displayer.AddDisplayFunction("GfpFlags", DisplayGfpFlags);
538      } else if (key == "pingpong") {
539        sort_comparator.AddCompareFunction(CompareCrossCpuAllocations);
540        displayer.AddDisplayFunction("Pingpong", DisplayCrossCpuAllocations);
541      } else {
542        LOG(ERROR) << "Unknown sort key for slab allocation: " << key;
543        return false;
544      }
545      slab_sample_tree_builder_.reset(
546          new SlabSampleTreeBuilder(comparator, &thread_tree_));
547      slab_sample_tree_builder_->SetCallChainSampleOptions(
548          accumulate_callchain_, print_callgraph_, !callgraph_show_callee_,
549          false);
550      sort_comparator.AddComparator(comparator);
551      slab_sample_tree_sorter_.reset(new SlabSampleTreeSorter(sort_comparator));
552      slab_sample_tree_displayer_.reset(new SlabSampleTreeDisplayer(displayer));
553    }
554  }
555  return true;
556}
557
558void KmemCommand::ReadEventAttrsFromRecordFile() {
559  std::vector<EventAttrWithId> attrs = record_file_reader_->AttrSection();
560  for (const auto& attr_with_id : attrs) {
561    EventAttrWithName attr;
562    attr.attr = *attr_with_id.attr;
563    attr.event_ids = attr_with_id.ids;
564    attr.name = GetEventNameByAttr(attr.attr);
565    event_attrs_.push_back(attr);
566  }
567}
568
569bool KmemCommand::ReadFeaturesFromRecordFile() {
570  record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree_);
571  std::string arch =
572      record_file_reader_->ReadFeatureString(PerfFileFormat::FEAT_ARCH);
573  if (!arch.empty()) {
574    record_file_arch_ = GetArchType(arch);
575    if (record_file_arch_ == ARCH_UNSUPPORTED) {
576      return false;
577    }
578  }
579  std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature();
580  if (!cmdline.empty()) {
581    record_cmdline_ = android::base::Join(cmdline, ' ');
582  }
583  if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_TRACING_DATA)) {
584    std::vector<char> tracing_data;
585    if (!record_file_reader_->ReadFeatureSection(
586            PerfFileFormat::FEAT_TRACING_DATA, &tracing_data)) {
587      return false;
588    }
589    ProcessTracingData(tracing_data);
590  }
591  return true;
592}
593
594bool KmemCommand::ReadSampleTreeFromRecordFile() {
595  if (!record_file_reader_->ReadDataSection(
596          [this](std::unique_ptr<Record> record) {
597            return ProcessRecord(std::move(record));
598          })) {
599    return false;
600  }
601  if (use_slab_) {
602    slab_sample_tree_ = slab_sample_tree_builder_->GetSampleTree();
603    slab_sample_tree_sorter_->Sort(slab_sample_tree_.samples, print_callgraph_);
604  }
605  return true;
606}
607
608bool KmemCommand::ProcessRecord(std::unique_ptr<Record> record) {
609  thread_tree_.Update(*record);
610  if (record->type() == PERF_RECORD_SAMPLE) {
611    if (use_slab_) {
612      slab_sample_tree_builder_->ProcessSampleRecord(
613          *static_cast<const SampleRecord*>(record.get()));
614    }
615  } else if (record->type() == PERF_RECORD_TRACING_DATA) {
616    const auto& r = *static_cast<TracingDataRecord*>(record.get());
617    ProcessTracingData(std::vector<char>(r.data, r.data + r.data_size));
618  }
619  return true;
620}
621
622void KmemCommand::ProcessTracingData(const std::vector<char>& data) {
623  Tracing tracing(data);
624  for (auto& attr : event_attrs_) {
625    if (attr.attr.type == PERF_TYPE_TRACEPOINT) {
626      uint64_t trace_event_id = attr.attr.config;
627      attr.name = tracing.GetTracingEventNameHavingId(trace_event_id);
628      TracingFormat format = tracing.GetTracingFormatHavingId(trace_event_id);
629      if (use_slab_) {
630        if (format.name == "kmalloc" || format.name == "kmem_cache_alloc" ||
631            format.name == "kmalloc_node" ||
632            format.name == "kmem_cache_alloc_node") {
633          SlabFormat f;
634          f.type = SlabFormat::KMEM_ALLOC;
635          format.GetField("call_site", f.call_site);
636          format.GetField("ptr", f.ptr);
637          format.GetField("bytes_req", f.bytes_req);
638          format.GetField("bytes_alloc", f.bytes_alloc);
639          format.GetField("gfp_flags", f.gfp_flags);
640          slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f);
641        } else if (format.name == "kfree" || format.name == "kmem_cache_free") {
642          SlabFormat f;
643          f.type = SlabFormat::KMEM_FREE;
644          format.GetField("call_site", f.call_site);
645          format.GetField("ptr", f.ptr);
646          slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f);
647        }
648      }
649    }
650  }
651}
652
653bool KmemCommand::PrintReport() {
654  std::unique_ptr<FILE, decltype(&fclose)> file_handler(nullptr, fclose);
655  FILE* report_fp = stdout;
656  if (!report_filename_.empty()) {
657    file_handler.reset(fopen(report_filename_.c_str(), "w"));
658    if (file_handler == nullptr) {
659      PLOG(ERROR) << "failed to open " << report_filename_;
660      return false;
661    }
662    report_fp = file_handler.get();
663  }
664  PrintReportContext(report_fp);
665  if (use_slab_) {
666    fprintf(report_fp, "\n\n");
667    PrintSlabReportContext(report_fp);
668    slab_sample_tree_displayer_->DisplaySamples(
669        report_fp, slab_sample_tree_.samples, &slab_sample_tree_);
670  }
671  return true;
672}
673
674void KmemCommand::PrintReportContext(FILE* fp) {
675  if (!record_cmdline_.empty()) {
676    fprintf(fp, "Cmdline: %s\n", record_cmdline_.c_str());
677  }
678  fprintf(fp, "Arch: %s\n", GetArchString(record_file_arch_).c_str());
679  for (const auto& attr : event_attrs_) {
680    fprintf(fp, "Event: %s (type %u, config %llu)\n", attr.name.c_str(),
681            attr.attr.type, attr.attr.config);
682  }
683}
684
685void KmemCommand::PrintSlabReportContext(FILE* fp) {
686  fprintf(fp, "Slab allocation information:\n");
687  fprintf(fp, "Total requested bytes: %" PRIu64 "\n",
688          slab_sample_tree_.total_requested_bytes);
689  fprintf(fp, "Total allocated bytes: %" PRIu64 "\n",
690          slab_sample_tree_.total_allocated_bytes);
691  uint64_t fragment = slab_sample_tree_.total_allocated_bytes -
692                      slab_sample_tree_.total_requested_bytes;
693  double percentage = 0.0;
694  if (slab_sample_tree_.total_allocated_bytes != 0) {
695    percentage = 100.0 * fragment / slab_sample_tree_.total_allocated_bytes;
696  }
697  fprintf(fp, "Total fragment: %" PRIu64 ", %f%%\n", fragment, percentage);
698  fprintf(fp, "Total allocations: %" PRIu64 "\n",
699          slab_sample_tree_.nr_allocations);
700  fprintf(fp, "Total frees: %" PRIu64 "\n", slab_sample_tree_.nr_frees);
701  percentage = 0.0;
702  if (slab_sample_tree_.nr_allocations != 0) {
703    percentage = 100.0 * slab_sample_tree_.nr_cross_cpu_allocations /
704                 slab_sample_tree_.nr_allocations;
705  }
706  fprintf(fp, "Total cross cpu allocation/free: %" PRIu64 ", %f%%\n",
707          slab_sample_tree_.nr_cross_cpu_allocations, percentage);
708  fprintf(fp, "\n");
709}
710
711}  // namespace
712
713void RegisterKmemCommand() {
714  RegisterCommand("kmem",
715                  [] { return std::unique_ptr<Command>(new KmemCommand()); });
716}
717