1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef SIMPLE_PERF_SAMPLE_TREE_H_
18#define SIMPLE_PERF_SAMPLE_TREE_H_
19
20#include <unordered_map>
21
22#include "callchain.h"
23#include "dwarf_unwind.h"
24#include "perf_regs.h"
25#include "record.h"
26#include "SampleComparator.h"
27#include "SampleDisplayer.h"
28#include "thread_tree.h"
29
30// A SampleTree is a collection of samples. A profiling report is mainly about
31// constructing a SampleTree and display it. There are three steps involved:
32// build the tree, sort the tree, and display it. For example, if we want to
33// show how many cpu-cycles are spent in different functions, we should do as
34// follows:
35// 1. Build a SampleTree from SampleRecords with each sample containing
36//    (cpu-cycles, function name). When building the tree, we should merge
37//    samples containing the same function name.
38// 2. Sort the SampleTree by cpu-cycles in the sample. As we want to display the
39//    samples in a decreasing order of cpu-cycles, we should sort it like this.
40// 3. Display the SampleTree, each sample prints its (cpu-cycles, function name)
41//    pair.
42//
43// We represent the three steps with three template classes.
44// 1. A SampleTree is built by SampleTreeBuilder. The comparator passed in
45//    SampleTreeBuilder's constructor decides the property of samples should be
46//    merged together.
47// 2. After a SampleTree is built and got from SampleTreeBuilder, it should be
48//    sorted by SampleTreeSorter. The sort result decides the order to show
49//    samples.
50// 3. At last, the sorted SampleTree is passed to SampleTreeDisplayer, which
51//    displays each sample in the SampleTree.
52
53template <typename EntryT, typename AccumulateInfoT>
54class SampleTreeBuilder {
55 public:
56  explicit SampleTreeBuilder(SampleComparator<EntryT> comparator)
57      : sample_set_(comparator),
58        accumulate_callchain_(false),
59        sample_comparator_(comparator),
60        callchain_sample_set_(comparator),
61        use_branch_address_(false),
62        build_callchain_(false),
63        use_caller_as_callchain_root_(false),
64        strict_unwind_arch_check_(false) {}
65
66  virtual ~SampleTreeBuilder() {}
67
68  void SetBranchSampleOption(bool use_branch_address) {
69    use_branch_address_ = use_branch_address;
70  }
71
72  void SetCallChainSampleOptions(bool accumulate_callchain,
73                                 bool build_callchain,
74                                 bool use_caller_as_callchain_root,
75                                 bool strict_unwind_arch_check) {
76    accumulate_callchain_ = accumulate_callchain;
77    build_callchain_ = build_callchain;
78    use_caller_as_callchain_root_ = use_caller_as_callchain_root;
79    strict_unwind_arch_check_ = strict_unwind_arch_check;
80  }
81
82  void ProcessSampleRecord(const SampleRecord& r) {
83    if (use_branch_address_ && (r.sample_type & PERF_SAMPLE_BRANCH_STACK)) {
84      for (uint64_t i = 0; i < r.branch_stack_data.stack_nr; ++i) {
85        auto& item = r.branch_stack_data.stack[i];
86        if (item.from != 0 && item.to != 0) {
87          CreateBranchSample(r, item);
88        }
89      }
90      return;
91    }
92    bool in_kernel = r.InKernel();
93    AccumulateInfoT acc_info;
94    EntryT* sample = CreateSample(r, in_kernel, &acc_info);
95    if (sample == nullptr) {
96      return;
97    }
98    if (accumulate_callchain_) {
99      std::vector<uint64_t> ips;
100      if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
101        ips.insert(ips.end(), r.callchain_data.ips,
102                   r.callchain_data.ips + r.callchain_data.ip_nr);
103      }
104      const ThreadEntry* thread = GetThreadOfSample(sample);
105      // Use stack_user_data.data.size() instead of stack_user_data.dyn_size, to
106      // make up for the missing kernel patch in N9. See b/22612370.
107      if (thread != nullptr && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
108          (r.regs_user_data.reg_mask != 0) &&
109          (r.sample_type & PERF_SAMPLE_STACK_USER) &&
110          (r.GetValidStackSize() > 0)) {
111        RegSet regs = CreateRegSet(r.regs_user_data.abi,
112                                   r.regs_user_data.reg_mask,
113                                   r.regs_user_data.regs);
114        std::vector<uint64_t> unwind_ips =
115            UnwindCallChain(r.regs_user_data.abi, *thread, regs,
116                            r.stack_user_data.data,
117                            r.GetValidStackSize(), strict_unwind_arch_check_);
118        if (!unwind_ips.empty()) {
119          ips.push_back(PERF_CONTEXT_USER);
120          ips.insert(ips.end(), unwind_ips.begin(), unwind_ips.end());
121        }
122      }
123
124      std::vector<EntryT*> callchain;
125      callchain.push_back(sample);
126
127      bool first_ip = true;
128      for (auto& ip : ips) {
129        if (ip >= PERF_CONTEXT_MAX) {
130          switch (ip) {
131            case PERF_CONTEXT_KERNEL:
132              in_kernel = true;
133              break;
134            case PERF_CONTEXT_USER:
135              in_kernel = false;
136              break;
137            default:
138              LOG(DEBUG) << "Unexpected perf_context in callchain: " << ip;
139          }
140        } else {
141          if (first_ip) {
142            first_ip = false;
143            // Remove duplication with sampled ip.
144            if (ip == r.ip_data.ip) {
145              continue;
146            }
147          }
148          EntryT* callchain_sample =
149              CreateCallChainSample(sample, ip, in_kernel, callchain, acc_info);
150          if (callchain_sample == nullptr) {
151            break;
152          }
153          callchain.push_back(callchain_sample);
154        }
155      }
156
157      if (build_callchain_) {
158        std::set<EntryT*> added_set;
159        if (use_caller_as_callchain_root_) {
160          std::reverse(callchain.begin(), callchain.end());
161        }
162        EntryT* parent = nullptr;
163        while (callchain.size() >= 2) {
164          EntryT* sample = callchain[0];
165          callchain.erase(callchain.begin());
166          // Add only once for recursive calls on callchain.
167          if (added_set.find(sample) != added_set.end()) {
168            continue;
169          }
170          added_set.insert(sample);
171          InsertCallChainForSample(sample, callchain, acc_info);
172          UpdateCallChainParentInfo(sample, parent);
173          parent = sample;
174        }
175      }
176    }
177  }
178
179  std::vector<EntryT*> GetSamples() const {
180    std::vector<EntryT*> result;
181    for (auto& entry : sample_set_) {
182      result.push_back(entry);
183    }
184    return result;
185  }
186
187 protected:
188  virtual EntryT* CreateSample(const SampleRecord& r, bool in_kernel,
189                               AccumulateInfoT* acc_info) = 0;
190  virtual EntryT* CreateBranchSample(const SampleRecord& r,
191                                     const BranchStackItemType& item) = 0;
192  virtual EntryT* CreateCallChainSample(const EntryT* sample, uint64_t ip,
193                                        bool in_kernel,
194                                        const std::vector<EntryT*>& callchain,
195                                        const AccumulateInfoT& acc_info) = 0;
196  virtual const ThreadEntry* GetThreadOfSample(EntryT*) = 0;
197  virtual uint64_t GetPeriodForCallChain(const AccumulateInfoT& acc_info) = 0;
198  virtual bool FilterSample(const EntryT*) { return true; }
199
200  virtual void UpdateSummary(const EntryT*) {}
201
202  virtual void MergeSample(EntryT* sample1, EntryT* sample2) = 0;
203
204  EntryT* InsertSample(std::unique_ptr<EntryT> sample) {
205    if (sample == nullptr || !FilterSample(sample.get())) {
206      return nullptr;
207    }
208    UpdateSummary(sample.get());
209    EntryT* result;
210    auto it = sample_set_.find(sample.get());
211    if (it == sample_set_.end()) {
212      result = sample.get();
213      sample_set_.insert(sample.get());
214      sample_storage_.push_back(std::move(sample));
215    } else {
216      result = *it;
217      MergeSample(*it, sample.get());
218    }
219    return result;
220  }
221
222  EntryT* InsertCallChainSample(std::unique_ptr<EntryT> sample,
223                                const std::vector<EntryT*>& callchain) {
224    if (sample == nullptr) {
225      return nullptr;
226    }
227    if (!FilterSample(sample.get())) {
228      // Store in callchain_sample_set_ for use in other EntryT's callchain.
229      auto it = callchain_sample_set_.find(sample.get());
230      if (it != callchain_sample_set_.end()) {
231        return *it;
232      }
233      EntryT* result = sample.get();
234      callchain_sample_set_.insert(sample.get());
235      sample_storage_.push_back(std::move(sample));
236      return result;
237    }
238
239    auto it = sample_set_.find(sample.get());
240    if (it != sample_set_.end()) {
241      EntryT* sample = *it;
242      // Process only once for recursive function call.
243      if (std::find(callchain.begin(), callchain.end(), sample) !=
244          callchain.end()) {
245        return sample;
246      }
247    }
248    return InsertSample(std::move(sample));
249  }
250
251  void InsertCallChainForSample(EntryT* sample,
252                                const std::vector<EntryT*>& callchain,
253                                const AccumulateInfoT& acc_info) {
254    uint64_t period = GetPeriodForCallChain(acc_info);
255    sample->callchain.AddCallChain(
256        callchain, period, [&](const EntryT* s1, const EntryT* s2) {
257          return sample_comparator_.IsSameSample(s1, s2);
258        });
259  }
260
261  void AddCallChainDuplicateInfo() {
262    if (build_callchain_) {
263      for (EntryT* sample : sample_set_) {
264        auto it = callchain_parent_map_.find(sample);
265        if (it != callchain_parent_map_.end() && !it->second.has_multiple_parents) {
266          sample->callchain.duplicated = true;
267        }
268      }
269    }
270  }
271
272  std::set<EntryT*, SampleComparator<EntryT>> sample_set_;
273  bool accumulate_callchain_;
274
275 private:
276  void UpdateCallChainParentInfo(EntryT* sample, EntryT* parent) {
277    if (parent == nullptr) {
278      return;
279    }
280    auto it = callchain_parent_map_.find(sample);
281    if (it == callchain_parent_map_.end()) {
282      CallChainParentInfo info;
283      info.parent = parent;
284      info.has_multiple_parents = false;
285      callchain_parent_map_[sample] = info;
286    } else if (it->second.parent != parent) {
287      it->second.has_multiple_parents = true;
288    }
289  }
290
291  const SampleComparator<EntryT> sample_comparator_;
292  // If a CallChainSample is filtered out, it is stored in callchain_sample_set_
293  // and only used in other EntryT's callchain.
294  std::set<EntryT*, SampleComparator<EntryT>> callchain_sample_set_;
295  std::vector<std::unique_ptr<EntryT>> sample_storage_;
296
297  struct CallChainParentInfo {
298    EntryT* parent;
299    bool has_multiple_parents;
300  };
301  std::unordered_map<EntryT*, CallChainParentInfo> callchain_parent_map_;
302
303  bool use_branch_address_;
304  bool build_callchain_;
305  bool use_caller_as_callchain_root_;
306  bool strict_unwind_arch_check_;
307};
308
309template <typename EntryT>
310class SampleTreeSorter {
311 public:
312  explicit SampleTreeSorter(SampleComparator<EntryT> comparator)
313      : comparator_(comparator) {}
314
315  virtual ~SampleTreeSorter() {}
316
317  void Sort(std::vector<EntryT*>& v, bool sort_callchain) {
318    if (sort_callchain) {
319      for (auto& sample : v) {
320        SortCallChain(sample);
321      }
322    }
323    if (!comparator_.empty()) {
324      std::sort(v.begin(), v.end(), [this](const EntryT* s1, const EntryT* s2) {
325        return comparator_(s1, s2);
326      });
327    }
328  }
329
330 protected:
331  void SortCallChain(EntryT* sample) { sample->callchain.SortByPeriod(); }
332
333 private:
334  SampleComparator<EntryT> comparator_;
335};
336
337template <typename EntryT, typename InfoT>
338class SampleTreeDisplayer {
339 public:
340  explicit SampleTreeDisplayer(SampleDisplayer<EntryT, InfoT> displayer)
341      : displayer_(displayer) {}
342
343  virtual ~SampleTreeDisplayer() {}
344
345  void DisplaySamples(FILE* fp, const std::vector<EntryT*>& samples,
346                      const InfoT* info) {
347    displayer_.SetInfo(info);
348    for (const auto& sample : samples) {
349      displayer_.AdjustWidth(sample);
350    }
351    displayer_.PrintNames(fp);
352    for (const auto& sample : samples) {
353      displayer_.PrintSample(fp, sample);
354    }
355  }
356
357 private:
358  SampleDisplayer<EntryT, InfoT> displayer_;
359};
360
361#endif  // SIMPLE_PERF_SAMPLE_TREE_H_
362