sample_tree.h revision 81a9d33dd0d753e4d4915dfb6f453b916be08813
1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef SIMPLE_PERF_SAMPLE_TREE_H_
18#define SIMPLE_PERF_SAMPLE_TREE_H_
19
20#include <unordered_map>
21
22#include "callchain.h"
23#include "dwarf_unwind.h"
24#include "perf_regs.h"
25#include "record.h"
26#include "SampleComparator.h"
27#include "SampleDisplayer.h"
28#include "thread_tree.h"
29
30// A SampleTree is a collection of samples. A profiling report is mainly about
31// constructing a SampleTree and display it. There are three steps involved:
32// build the tree, sort the tree, and display it. For example, if we want to
33// show how many cpu-cycles are spent in different functions, we should do as
34// follows:
35// 1. Build a SampleTree from SampleRecords with each sample containing
36//    (cpu-cycles, function name). When building the tree, we should merge
37//    samples containing the same function name.
38// 2. Sort the SampleTree by cpu-cycles in the sample. As we want to display the
39//    samples in a decreasing order of cpu-cycles, we should sort it like this.
40// 3. Display the SampleTree, each sample prints its (cpu-cycles, function name)
41//    pair.
42//
43// We represent the three steps with three template classes.
44// 1. A SampleTree is built by SampleTreeBuilder. The comparator passed in
45//    SampleTreeBuilder's constructor decides the property of samples should be
46//    merged together.
47// 2. After a SampleTree is built and got from SampleTreeBuilder, it should be
48//    sorted by SampleTreeSorter. The sort result decides the order to show
49//    samples.
50// 3. At last, the sorted SampleTree is passed to SampleTreeDisplayer, which
51//    displays each sample in the SampleTree.
52
53template <typename EntryT, typename AccumulateInfoT>
54class SampleTreeBuilder {
55 public:
56  explicit SampleTreeBuilder(const SampleComparator<EntryT>& comparator)
57      : sample_set_(comparator),
58        accumulate_callchain_(false),
59        sample_comparator_(comparator),
60        callchain_sample_set_(comparator),
61        use_branch_address_(false),
62        build_callchain_(false),
63        use_caller_as_callchain_root_(false),
64        strict_unwind_arch_check_(false) {}
65
66  virtual ~SampleTreeBuilder() {}
67
68  void SetBranchSampleOption(bool use_branch_address) {
69    use_branch_address_ = use_branch_address;
70  }
71
72  void SetCallChainSampleOptions(bool accumulate_callchain,
73                                 bool build_callchain,
74                                 bool use_caller_as_callchain_root,
75                                 bool strict_unwind_arch_check) {
76    accumulate_callchain_ = accumulate_callchain;
77    build_callchain_ = build_callchain;
78    use_caller_as_callchain_root_ = use_caller_as_callchain_root;
79    strict_unwind_arch_check_ = strict_unwind_arch_check;
80  }
81
82  void ProcessSampleRecord(const SampleRecord& r) {
83    if (use_branch_address_ && (r.sample_type & PERF_SAMPLE_BRANCH_STACK)) {
84      for (uint64_t i = 0; i < r.branch_stack_data.stack_nr; ++i) {
85        auto& item = r.branch_stack_data.stack[i];
86        if (item.from != 0 && item.to != 0) {
87          CreateBranchSample(r, item);
88        }
89      }
90      return;
91    }
92    bool in_kernel = r.InKernel();
93    AccumulateInfoT acc_info;
94    EntryT* sample = CreateSample(r, in_kernel, &acc_info);
95    if (sample == nullptr) {
96      return;
97    }
98    if (accumulate_callchain_) {
99      std::vector<uint64_t> ips;
100      if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
101        ips.insert(ips.end(), r.callchain_data.ips,
102                   r.callchain_data.ips + r.callchain_data.ip_nr);
103      }
104      const ThreadEntry* thread = GetThreadOfSample(sample);
105      // Use stack_user_data.data.size() instead of stack_user_data.dyn_size, to
106      // make up for the missing kernel patch in N9. See b/22612370.
107      if (thread != nullptr && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
108          (r.regs_user_data.reg_mask != 0) &&
109          (r.sample_type & PERF_SAMPLE_STACK_USER) &&
110          (r.GetValidStackSize() > 0)) {
111        RegSet regs = CreateRegSet(r.regs_user_data.abi,
112                                   r.regs_user_data.reg_mask,
113                                   r.regs_user_data.regs);
114        std::vector<uint64_t> user_ips;
115        std::vector<uint64_t> sps;
116        if (UnwindCallChain(r.regs_user_data.abi, *thread, regs, r.stack_user_data.data,
117                            r.GetValidStackSize(), strict_unwind_arch_check_, &user_ips, &sps)) {
118          ips.push_back(PERF_CONTEXT_USER);
119          ips.insert(ips.end(), user_ips.begin(), user_ips.end());
120        }
121      }
122
123      std::vector<EntryT*> callchain;
124      callchain.push_back(sample);
125
126      bool first_ip = true;
127      for (auto& ip : ips) {
128        if (ip >= PERF_CONTEXT_MAX) {
129          switch (ip) {
130            case PERF_CONTEXT_KERNEL:
131              in_kernel = true;
132              break;
133            case PERF_CONTEXT_USER:
134              in_kernel = false;
135              break;
136            default:
137              LOG(DEBUG) << "Unexpected perf_context in callchain: " << ip;
138          }
139        } else {
140          if (first_ip) {
141            first_ip = false;
142            // Remove duplication with sampled ip.
143            if (ip == r.ip_data.ip) {
144              continue;
145            }
146          }
147          EntryT* callchain_sample =
148              CreateCallChainSample(sample, ip, in_kernel, callchain, acc_info);
149          if (callchain_sample == nullptr) {
150            break;
151          }
152          callchain.push_back(callchain_sample);
153        }
154      }
155
156      if (build_callchain_) {
157        std::set<EntryT*> added_set;
158        if (use_caller_as_callchain_root_) {
159          std::reverse(callchain.begin(), callchain.end());
160        }
161        EntryT* parent = nullptr;
162        while (callchain.size() >= 2) {
163          EntryT* sample = callchain[0];
164          callchain.erase(callchain.begin());
165          // Add only once for recursive calls on callchain.
166          if (added_set.find(sample) != added_set.end()) {
167            continue;
168          }
169          added_set.insert(sample);
170          InsertCallChainForSample(sample, callchain, acc_info);
171          UpdateCallChainParentInfo(sample, parent);
172          parent = sample;
173        }
174      }
175    }
176  }
177
178  std::vector<EntryT*> GetSamples() const {
179    std::vector<EntryT*> result;
180    for (auto& entry : sample_set_) {
181      result.push_back(entry);
182    }
183    return result;
184  }
185
186 protected:
187  virtual EntryT* CreateSample(const SampleRecord& r, bool in_kernel,
188                               AccumulateInfoT* acc_info) = 0;
189  virtual EntryT* CreateBranchSample(const SampleRecord& r,
190                                     const BranchStackItemType& item) = 0;
191  virtual EntryT* CreateCallChainSample(const EntryT* sample, uint64_t ip,
192                                        bool in_kernel,
193                                        const std::vector<EntryT*>& callchain,
194                                        const AccumulateInfoT& acc_info) = 0;
195  virtual const ThreadEntry* GetThreadOfSample(EntryT*) = 0;
196  virtual uint64_t GetPeriodForCallChain(const AccumulateInfoT& acc_info) = 0;
197  virtual bool FilterSample(const EntryT*) { return true; }
198
199  virtual void UpdateSummary(const EntryT*) {}
200
201  virtual void MergeSample(EntryT* sample1, EntryT* sample2) = 0;
202
203  EntryT* InsertSample(std::unique_ptr<EntryT> sample) {
204    if (sample == nullptr || !FilterSample(sample.get())) {
205      return nullptr;
206    }
207    UpdateSummary(sample.get());
208    EntryT* result;
209    auto it = sample_set_.find(sample.get());
210    if (it == sample_set_.end()) {
211      result = sample.get();
212      sample_set_.insert(sample.get());
213      sample_storage_.push_back(std::move(sample));
214    } else {
215      result = *it;
216      MergeSample(*it, sample.get());
217    }
218    return result;
219  }
220
221  EntryT* InsertCallChainSample(std::unique_ptr<EntryT> sample,
222                                const std::vector<EntryT*>& callchain) {
223    if (sample == nullptr) {
224      return nullptr;
225    }
226    if (!FilterSample(sample.get())) {
227      // Store in callchain_sample_set_ for use in other EntryT's callchain.
228      auto it = callchain_sample_set_.find(sample.get());
229      if (it != callchain_sample_set_.end()) {
230        return *it;
231      }
232      EntryT* result = sample.get();
233      callchain_sample_set_.insert(sample.get());
234      sample_storage_.push_back(std::move(sample));
235      return result;
236    }
237
238    auto it = sample_set_.find(sample.get());
239    if (it != sample_set_.end()) {
240      EntryT* sample = *it;
241      // Process only once for recursive function call.
242      if (std::find(callchain.begin(), callchain.end(), sample) !=
243          callchain.end()) {
244        return sample;
245      }
246    }
247    return InsertSample(std::move(sample));
248  }
249
250  void InsertCallChainForSample(EntryT* sample,
251                                const std::vector<EntryT*>& callchain,
252                                const AccumulateInfoT& acc_info) {
253    uint64_t period = GetPeriodForCallChain(acc_info);
254    sample->callchain.AddCallChain(
255        callchain, period, [&](const EntryT* s1, const EntryT* s2) {
256          return sample_comparator_.IsSameSample(s1, s2);
257        });
258  }
259
260  void AddCallChainDuplicateInfo() {
261    if (build_callchain_) {
262      for (EntryT* sample : sample_set_) {
263        auto it = callchain_parent_map_.find(sample);
264        if (it != callchain_parent_map_.end() && !it->second.has_multiple_parents) {
265          sample->callchain.duplicated = true;
266        }
267      }
268    }
269  }
270
271  std::set<EntryT*, SampleComparator<EntryT>> sample_set_;
272  bool accumulate_callchain_;
273
274 private:
275  void UpdateCallChainParentInfo(EntryT* sample, EntryT* parent) {
276    if (parent == nullptr) {
277      return;
278    }
279    auto it = callchain_parent_map_.find(sample);
280    if (it == callchain_parent_map_.end()) {
281      CallChainParentInfo info;
282      info.parent = parent;
283      info.has_multiple_parents = false;
284      callchain_parent_map_[sample] = info;
285    } else if (it->second.parent != parent) {
286      it->second.has_multiple_parents = true;
287    }
288  }
289
290  const SampleComparator<EntryT> sample_comparator_;
291  // If a CallChainSample is filtered out, it is stored in callchain_sample_set_
292  // and only used in other EntryT's callchain.
293  std::set<EntryT*, SampleComparator<EntryT>> callchain_sample_set_;
294  std::vector<std::unique_ptr<EntryT>> sample_storage_;
295
296  struct CallChainParentInfo {
297    EntryT* parent;
298    bool has_multiple_parents;
299  };
300  std::unordered_map<EntryT*, CallChainParentInfo> callchain_parent_map_;
301
302  bool use_branch_address_;
303  bool build_callchain_;
304  bool use_caller_as_callchain_root_;
305  bool strict_unwind_arch_check_;
306};
307
308template <typename EntryT>
309class SampleTreeSorter {
310 public:
311  explicit SampleTreeSorter(SampleComparator<EntryT> comparator)
312      : comparator_(comparator) {}
313
314  virtual ~SampleTreeSorter() {}
315
316  void Sort(std::vector<EntryT*>& v, bool sort_callchain) {
317    if (sort_callchain) {
318      for (auto& sample : v) {
319        SortCallChain(sample);
320      }
321    }
322    if (!comparator_.empty()) {
323      std::sort(v.begin(), v.end(), [this](const EntryT* s1, const EntryT* s2) {
324        return comparator_(s1, s2);
325      });
326    }
327  }
328
329 protected:
330  void SortCallChain(EntryT* sample) { sample->callchain.SortByPeriod(); }
331
332 private:
333  SampleComparator<EntryT> comparator_;
334};
335
336template <typename EntryT, typename InfoT>
337class SampleTreeDisplayer {
338 public:
339  explicit SampleTreeDisplayer(SampleDisplayer<EntryT, InfoT> displayer)
340      : displayer_(displayer) {}
341
342  virtual ~SampleTreeDisplayer() {}
343
344  void DisplaySamples(FILE* fp, const std::vector<EntryT*>& samples,
345                      const InfoT* info) {
346    displayer_.SetInfo(info);
347    for (const auto& sample : samples) {
348      displayer_.AdjustWidth(sample);
349    }
350    displayer_.PrintNames(fp);
351    for (const auto& sample : samples) {
352      displayer_.PrintSample(fp, sample);
353    }
354  }
355
356 private:
357  SampleDisplayer<EntryT, InfoT> displayer_;
358};
359
360#endif  // SIMPLE_PERF_SAMPLE_TREE_H_
361