1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#ifndef ART_RUNTIME_BASE_HISTOGRAM_H_
17#define ART_RUNTIME_BASE_HISTOGRAM_H_
18
19#include <vector>
20#include <string>
21
22#include "base/logging.h"
23
24namespace art {
25
26// Creates a data histogram  for a better understanding of statistical data.
27// Histogram analysis goes beyond simple mean and standard deviation to provide
28// percentiles values, describing where the $% of the input data lies.
29// Designed to be simple and used with timing logger in art.
30
31template <class Value> class Histogram {
32  const double kAdjust;
33  const size_t kInitialBucketCount;
34
35 public:
36  class CumulativeData {
37    friend class Histogram<Value>;
38    std::vector<uint64_t> freq_;
39    std::vector<double> perc_;
40  };
41
42  // Used by the cumulative timing logger to search the histogram set using for an existing split
43  // with the same name using CumulativeLogger::HistogramComparator.
44  explicit Histogram(const char* name);
45  // This is the expected constructor when creating new Histograms.
46  Histogram(const char* name, Value initial_bucket_width, size_t max_buckets = 100);
47  void AddValue(Value);
48  void AdjustAndAddValue(Value);  // Add a value after dividing it by kAdjust.
49  // Builds the cumulative distribution function from the frequency data.
50  // Accumulative summation of frequencies.
51  // cumulative_freq[i] = sum(frequency[j] : 0 < j < i )
52  // Accumulative summation of percentiles; which is the frequency / SampleSize
53  // cumulative_perc[i] = sum(frequency[j] / SampleSize : 0 < j < i )
54  void CreateHistogram(CumulativeData* data) const;
55  // Reset the cumulative values, next time CreateHistogram is called it will recreate the cache.
56  void Reset();
57  double Mean() const;
58  double Variance() const;
59  double Percentile(double per, const CumulativeData& data) const;
60  void PrintConfidenceIntervals(std::ostream& os, double interval,
61                                const CumulativeData& data) const;
62  void PrintMemoryUse(std::ostream& os) const;
63  void PrintBins(std::ostream& os, const CumulativeData& data) const;
64  void DumpBins(std::ostream& os) const;
65  Value GetRange(size_t bucket_idx) const;
66  size_t GetBucketCount() const;
67
68  uint64_t SampleSize() const {
69    return sample_size_;
70  }
71
72  Value Sum() const {
73    return sum_;
74  }
75
76  Value AdjustedSum() const {
77    return sum_ * kAdjust;
78  }
79
80  Value Min() const {
81    return min_value_added_;
82  }
83
84  Value Max() const {
85    return max_value_added_;
86  }
87
88  Value BucketWidth() const {
89    return bucket_width_;
90  }
91
92  const std::string& Name() const {
93    return name_;
94  }
95
96 private:
97  void Initialize();
98  size_t FindBucket(Value val) const;
99  void BucketiseValue(Value val);
100  // Add more buckets to the histogram to fill in a new value that exceeded
101  // the max_read_value_.
102  void GrowBuckets(Value val);
103  std::string name_;
104  // Maximum number of buckets.
105  const size_t max_buckets_;
106  // Number of samples placed in histogram.
107  size_t sample_size_;
108  // Width of the bucket range. The lower the value is the more accurate
109  // histogram percentiles are. Grows adaptively when we hit max buckets.
110  Value bucket_width_;
111  // How many occurrences of values fall within a bucket at index i where i covers the range
112  // starting at  min_ + i * bucket_width_ with size bucket_size_.
113  std::vector<uint32_t> frequency_;
114  // Summation of all the elements inputed by the user.
115  Value sum_;
116  // Minimum value that can fit in the histogram. Fixed to zero for now.
117  Value min_;
118  // Maximum value that can fit in the histogram, grows adaptively.
119  Value max_;
120  // Summation of the values entered. Used to calculate variance.
121  Value sum_of_squares_;
122  // Maximum value entered in the histogram.
123  Value min_value_added_;
124  // Minimum value entered in the histogram.
125  Value max_value_added_;
126
127  DISALLOW_COPY_AND_ASSIGN(Histogram);
128};
129}  // namespace art
130
131#endif  // ART_RUNTIME_BASE_HISTOGRAM_H_
132