1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Histogram is an object that aggregates statistics, and can summarize them in
6// various forms, including ASCII graphical, HTML, and numerically (as a
7// vector of numbers corresponding to each of the aggregating buckets).
8
9// It supports calls to accumulate either time intervals (which are processed
10// as integral number of milliseconds), or arbitrary integral units.
11
12// For Histogram (exponential histogram), LinearHistogram and CustomHistogram,
13// the minimum for a declared range is 1 (instead of 0), while the maximum is
14// (HistogramBase::kSampleType_MAX - 1). However, there will always be underflow
15// and overflow buckets added automatically, so a 0 bucket will always exist
16// even when a minimum value of 1 is specified.
17
18// Each use of a histogram with the same name will reference the same underlying
19// data, so it is safe to record to the same histogram from multiple locations
20// in the code. It is a runtime error if all uses of the same histogram do not
21// agree exactly in type, bucket size and range.
22
23// For Histogram and LinearHistogram, the maximum for a declared range should
24// always be larger (not equal) than minimal range. Zero and
25// HistogramBase::kSampleType_MAX are implicitly added as first and last ranges,
26// so the smallest legal bucket_count is 3. However CustomHistogram can have
27// bucket count as 2 (when you give a custom ranges vector containing only 1
28// range).
29// For these 3 kinds of histograms, the max bucket count is always
30// (Histogram::kBucketCount_MAX - 1).
31
32// The buckets layout of class Histogram is exponential. For example, buckets
33// might contain (sequentially) the count of values in the following intervals:
34// [0,1), [1,2), [2,4), [4,8), [8,16), [16,32), [32,64), [64,infinity)
35// That bucket allocation would actually result from construction of a histogram
36// for values between 1 and 64, with 8 buckets, such as:
37// Histogram count("some name", 1, 64, 8);
38// Note that the underflow bucket [0,1) and the overflow bucket [64,infinity)
39// are also counted by the constructor in the user supplied "bucket_count"
40// argument.
41// The above example has an exponential ratio of 2 (doubling the bucket width
42// in each consecutive bucket).  The Histogram class automatically calculates
43// the smallest ratio that it can use to construct the number of buckets
44// selected in the constructor.  An another example, if you had 50 buckets,
45// and millisecond time values from 1 to 10000, then the ratio between
46// consecutive bucket widths will be approximately somewhere around the 50th
47// root of 10000.  This approach provides very fine grain (narrow) buckets
48// at the low end of the histogram scale, but allows the histogram to cover a
49// gigantic range with the addition of very few buckets.
50
51// Usually we use macros to define and use a histogram, which are defined in
52// base/metrics/histogram_macros.h. Note: Callers should include that header
53// directly if they only access the histogram APIs through macros.
54//
55// Macros use a pattern involving a function static variable, that is a pointer
56// to a histogram.  This static is explicitly initialized on any thread
57// that detects a uninitialized (NULL) pointer.  The potentially racy
58// initialization is not a problem as it is always set to point to the same
59// value (i.e., the FactoryGet always returns the same value).  FactoryGet
60// is also completely thread safe, which results in a completely thread safe,
61// and relatively fast, set of counters.  To avoid races at shutdown, the static
62// pointer is NOT deleted, and we leak the histograms at process termination.
63
64#ifndef BASE_METRICS_HISTOGRAM_H_
65#define BASE_METRICS_HISTOGRAM_H_
66
67#include <stddef.h>
68#include <stdint.h>
69
70#include <map>
71#include <memory>
72#include <string>
73#include <vector>
74
75#include "base/base_export.h"
76#include "base/compiler_specific.h"
77#include "base/gtest_prod_util.h"
78#include "base/logging.h"
79#include "base/macros.h"
80#include "base/metrics/bucket_ranges.h"
81#include "base/metrics/histogram_base.h"
82#include "base/metrics/histogram_samples.h"
83#include "base/time/time.h"
84
85namespace base {
86
87class BooleanHistogram;
88class CustomHistogram;
89class Histogram;
90class LinearHistogram;
91class Pickle;
92class PickleIterator;
93class SampleVector;
94
95class BASE_EXPORT Histogram : public HistogramBase {
96 public:
97  // Initialize maximum number of buckets in histograms as 16,384.
98  static const uint32_t kBucketCount_MAX;
99
100  typedef std::vector<Count> Counts;
101
102  ~Histogram() override;
103
104  //----------------------------------------------------------------------------
105  // For a valid histogram, input should follow these restrictions:
106  // minimum > 0 (if a minimum below 1 is specified, it will implicitly be
107  //              normalized up to 1)
108  // maximum > minimum
109  // buckets > 2 [minimum buckets needed: underflow, overflow and the range]
110  // Additionally,
111  // buckets <= (maximum - minimum + 2) - this is to ensure that we don't have
112  // more buckets than the range of numbers; having more buckets than 1 per
113  // value in the range would be nonsensical.
114  static HistogramBase* FactoryGet(const std::string& name,
115                                   Sample minimum,
116                                   Sample maximum,
117                                   uint32_t bucket_count,
118                                   int32_t flags);
119  static HistogramBase* FactoryTimeGet(const std::string& name,
120                                       base::TimeDelta minimum,
121                                       base::TimeDelta maximum,
122                                       uint32_t bucket_count,
123                                       int32_t flags);
124
125  // Overloads of the above two functions that take a const char* |name| param,
126  // to avoid code bloat from the std::string constructor being inlined into
127  // call sites.
128  static HistogramBase* FactoryGet(const char* name,
129                                   Sample minimum,
130                                   Sample maximum,
131                                   uint32_t bucket_count,
132                                   int32_t flags);
133  static HistogramBase* FactoryTimeGet(const char* name,
134                                       base::TimeDelta minimum,
135                                       base::TimeDelta maximum,
136                                       uint32_t bucket_count,
137                                       int32_t flags);
138
139  // Create a histogram using data in persistent storage.
140  static std::unique_ptr<HistogramBase> PersistentCreate(
141      const std::string& name,
142      Sample minimum,
143      Sample maximum,
144      const BucketRanges* ranges,
145      HistogramBase::AtomicCount* counts,
146      HistogramBase::AtomicCount* logged_counts,
147      uint32_t counts_size,
148      HistogramSamples::Metadata* meta,
149      HistogramSamples::Metadata* logged_meta);
150
151  static void InitializeBucketRanges(Sample minimum,
152                                     Sample maximum,
153                                     BucketRanges* ranges);
154
155  // This constant if for FindCorruption. Since snapshots of histograms are
156  // taken asynchronously relative to sampling, and our counting code currently
157  // does not prevent race conditions, it is pretty likely that we'll catch a
158  // redundant count that doesn't match the sample count.  We allow for a
159  // certain amount of slop before flagging this as an inconsistency. Even with
160  // an inconsistency, we'll snapshot it again (for UMA in about a half hour),
161  // so we'll eventually get the data, if it was not the result of a corruption.
162  static const int kCommonRaceBasedCountMismatch;
163
164  // Check to see if bucket ranges, counts and tallies in the snapshot are
165  // consistent with the bucket ranges and checksums in our histogram.  This can
166  // produce a false-alarm if a race occurred in the reading of the data during
167  // a SnapShot process, but should otherwise be false at all times (unless we
168  // have memory over-writes, or DRAM failures). Flag definitions are located
169  // under "enum Inconsistency" in base/metrics/histogram_base.h.
170  uint32_t FindCorruption(const HistogramSamples& samples) const override;
171
172  //----------------------------------------------------------------------------
173  // Accessors for factory construction, serialization and testing.
174  //----------------------------------------------------------------------------
175  Sample declared_min() const { return declared_min_; }
176  Sample declared_max() const { return declared_max_; }
177  virtual Sample ranges(uint32_t i) const;
178  virtual uint32_t bucket_count() const;
179  const BucketRanges* bucket_ranges() const { return bucket_ranges_; }
180
181  // This function validates histogram construction arguments. It returns false
182  // if some of the arguments are totally bad.
183  // Note. Currently it allow some bad input, e.g. 0 as minimum, but silently
184  // converts it to good input: 1.
185  // TODO(kaiwang): Be more restrict and return false for any bad input, and
186  // make this a readonly validating function.
187  static bool InspectConstructionArguments(const std::string& name,
188                                           Sample* minimum,
189                                           Sample* maximum,
190                                           uint32_t* bucket_count);
191
192  // HistogramBase implementation:
193  uint64_t name_hash() const override;
194  HistogramType GetHistogramType() const override;
195  bool HasConstructionArguments(Sample expected_minimum,
196                                Sample expected_maximum,
197                                uint32_t expected_bucket_count) const override;
198  void Add(Sample value) override;
199  void AddCount(Sample value, int count) override;
200  std::unique_ptr<HistogramSamples> SnapshotSamples() const override;
201  std::unique_ptr<HistogramSamples> SnapshotDelta() override;
202  std::unique_ptr<HistogramSamples> SnapshotFinalDelta() const override;
203  void AddSamples(const HistogramSamples& samples) override;
204  bool AddSamplesFromPickle(base::PickleIterator* iter) override;
205  void WriteHTMLGraph(std::string* output) const override;
206  void WriteAscii(std::string* output) const override;
207
208 protected:
209  // This class, defined entirely within the .cc file, contains all the
210  // common logic for building a Histogram and can be overridden by more
211  // specific types to alter details of how the creation is done. It is
212  // defined as an embedded class (rather than an anonymous one) so it
213  // can access the protected constructors.
214  class Factory;
215
216  // |ranges| should contain the underflow and overflow buckets. See top
217  // comments for example.
218  Histogram(const std::string& name,
219            Sample minimum,
220            Sample maximum,
221            const BucketRanges* ranges);
222
223  // Traditionally, histograms allocate their own memory for the bucket
224  // vector but "shared" histograms use memory regions allocated from a
225  // special memory segment that is passed in here.  It is assumed that
226  // the life of this memory is managed externally and exceeds the lifetime
227  // of this object. Practically, this memory is never released until the
228  // process exits and the OS cleans it up.
229  Histogram(const std::string& name,
230            Sample minimum,
231            Sample maximum,
232            const BucketRanges* ranges,
233            HistogramBase::AtomicCount* counts,
234            HistogramBase::AtomicCount* logged_counts,
235            uint32_t counts_size,
236            HistogramSamples::Metadata* meta,
237            HistogramSamples::Metadata* logged_meta);
238
239  // HistogramBase implementation:
240  bool SerializeInfoImpl(base::Pickle* pickle) const override;
241
242  // Method to override to skip the display of the i'th bucket if it's empty.
243  virtual bool PrintEmptyBucket(uint32_t index) const;
244
245  // Get normalized size, relative to the ranges(i).
246  virtual double GetBucketSize(Count current, uint32_t i) const;
247
248  // Return a string description of what goes in a given bucket.
249  // Most commonly this is the numeric value, but in derived classes it may
250  // be a name (or string description) given to the bucket.
251  virtual const std::string GetAsciiBucketRange(uint32_t it) const;
252
253 private:
254  // Allow tests to corrupt our innards for testing purposes.
255  FRIEND_TEST_ALL_PREFIXES(HistogramTest, BoundsTest);
256  FRIEND_TEST_ALL_PREFIXES(HistogramTest, BucketPlacementTest);
257  FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptSampleCounts);
258
259  friend class StatisticsRecorder;  // To allow it to delete duplicates.
260  friend class StatisticsRecorderTest;
261
262  friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
263      base::PickleIterator* iter);
264  static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
265
266  // Implementation of SnapshotSamples function.
267  std::unique_ptr<SampleVector> SnapshotSampleVector() const;
268
269  //----------------------------------------------------------------------------
270  // Helpers for emitting Ascii graphic.  Each method appends data to output.
271
272  void WriteAsciiImpl(bool graph_it,
273                      const std::string& newline,
274                      std::string* output) const;
275
276  // Find out how large (graphically) the largest bucket will appear to be.
277  double GetPeakBucketSize(const SampleVector& samples) const;
278
279  // Write a common header message describing this histogram.
280  void WriteAsciiHeader(const SampleVector& samples,
281                        Count sample_count,
282                        std::string* output) const;
283
284  // Write information about previous, current, and next buckets.
285  // Information such as cumulative percentage, etc.
286  void WriteAsciiBucketContext(const int64_t past,
287                               const Count current,
288                               const int64_t remaining,
289                               const uint32_t i,
290                               std::string* output) const;
291
292  // WriteJSON calls these.
293  void GetParameters(DictionaryValue* params) const override;
294
295  void GetCountAndBucketData(Count* count,
296                             int64_t* sum,
297                             ListValue* buckets) const override;
298
299  // Does not own this object. Should get from StatisticsRecorder.
300  const BucketRanges* bucket_ranges_;
301
302  Sample declared_min_;  // Less than this goes into the first bucket.
303  Sample declared_max_;  // Over this goes into the last bucket.
304
305  // Finally, provide the state that changes with the addition of each new
306  // sample.
307  std::unique_ptr<SampleVector> samples_;
308
309  // Also keep a previous uploaded state for calculating deltas.
310  std::unique_ptr<HistogramSamples> logged_samples_;
311
312  // Flag to indicate if PrepareFinalDelta has been previously called. It is
313  // used to DCHECK that a final delta is not created multiple times.
314  mutable bool final_delta_created_ = false;
315
316  DISALLOW_COPY_AND_ASSIGN(Histogram);
317};
318
319//------------------------------------------------------------------------------
320
321// LinearHistogram is a more traditional histogram, with evenly spaced
322// buckets.
323class BASE_EXPORT LinearHistogram : public Histogram {
324 public:
325  ~LinearHistogram() override;
326
327  /* minimum should start from 1. 0 is as minimum is invalid. 0 is an implicit
328     default underflow bucket. */
329  static HistogramBase* FactoryGet(const std::string& name,
330                                   Sample minimum,
331                                   Sample maximum,
332                                   uint32_t bucket_count,
333                                   int32_t flags);
334  static HistogramBase* FactoryTimeGet(const std::string& name,
335                                       TimeDelta minimum,
336                                       TimeDelta maximum,
337                                       uint32_t bucket_count,
338                                       int32_t flags);
339
340  // Overloads of the above two functions that take a const char* |name| param,
341  // to avoid code bloat from the std::string constructor being inlined into
342  // call sites.
343  static HistogramBase* FactoryGet(const char* name,
344                                   Sample minimum,
345                                   Sample maximum,
346                                   uint32_t bucket_count,
347                                   int32_t flags);
348  static HistogramBase* FactoryTimeGet(const char* name,
349                                       TimeDelta minimum,
350                                       TimeDelta maximum,
351                                       uint32_t bucket_count,
352                                       int32_t flags);
353
354  // Create a histogram using data in persistent storage.
355  static std::unique_ptr<HistogramBase> PersistentCreate(
356      const std::string& name,
357      Sample minimum,
358      Sample maximum,
359      const BucketRanges* ranges,
360      HistogramBase::AtomicCount* counts,
361      HistogramBase::AtomicCount* logged_counts,
362      uint32_t counts_size,
363      HistogramSamples::Metadata* meta,
364      HistogramSamples::Metadata* logged_meta);
365
366  struct DescriptionPair {
367    Sample sample;
368    const char* description;  // Null means end of a list of pairs.
369  };
370
371  // Create a LinearHistogram and store a list of number/text values for use in
372  // writing the histogram graph.
373  // |descriptions| can be NULL, which means no special descriptions to set. If
374  // it's not NULL, the last element in the array must has a NULL in its
375  // "description" field.
376  static HistogramBase* FactoryGetWithRangeDescription(
377      const std::string& name,
378      Sample minimum,
379      Sample maximum,
380      uint32_t bucket_count,
381      int32_t flags,
382      const DescriptionPair descriptions[]);
383
384  static void InitializeBucketRanges(Sample minimum,
385                                     Sample maximum,
386                                     BucketRanges* ranges);
387
388  // Overridden from Histogram:
389  HistogramType GetHistogramType() const override;
390
391 protected:
392  class Factory;
393
394  LinearHistogram(const std::string& name,
395                  Sample minimum,
396                  Sample maximum,
397                  const BucketRanges* ranges);
398
399  LinearHistogram(const std::string& name,
400                  Sample minimum,
401                  Sample maximum,
402                  const BucketRanges* ranges,
403                  HistogramBase::AtomicCount* counts,
404                  HistogramBase::AtomicCount* logged_counts,
405                  uint32_t counts_size,
406                  HistogramSamples::Metadata* meta,
407                  HistogramSamples::Metadata* logged_meta);
408
409  double GetBucketSize(Count current, uint32_t i) const override;
410
411  // If we have a description for a bucket, then return that.  Otherwise
412  // let parent class provide a (numeric) description.
413  const std::string GetAsciiBucketRange(uint32_t i) const override;
414
415  // Skip printing of name for numeric range if we have a name (and if this is
416  // an empty bucket).
417  bool PrintEmptyBucket(uint32_t index) const override;
418
419 private:
420  friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
421      base::PickleIterator* iter);
422  static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
423
424  // For some ranges, we store a printable description of a bucket range.
425  // If there is no description, then GetAsciiBucketRange() uses parent class
426  // to provide a description.
427  typedef std::map<Sample, std::string> BucketDescriptionMap;
428  BucketDescriptionMap bucket_description_;
429
430  DISALLOW_COPY_AND_ASSIGN(LinearHistogram);
431};
432
433//------------------------------------------------------------------------------
434
435// BooleanHistogram is a histogram for booleans.
436class BASE_EXPORT BooleanHistogram : public LinearHistogram {
437 public:
438  static HistogramBase* FactoryGet(const std::string& name, int32_t flags);
439
440  // Overload of the above function that takes a const char* |name| param,
441  // to avoid code bloat from the std::string constructor being inlined into
442  // call sites.
443  static HistogramBase* FactoryGet(const char* name, int32_t flags);
444
445  // Create a histogram using data in persistent storage.
446  static std::unique_ptr<HistogramBase> PersistentCreate(
447      const std::string& name,
448      const BucketRanges* ranges,
449      HistogramBase::AtomicCount* counts,
450      HistogramBase::AtomicCount* logged_counts,
451      HistogramSamples::Metadata* meta,
452      HistogramSamples::Metadata* logged_meta);
453
454  HistogramType GetHistogramType() const override;
455
456 protected:
457  class Factory;
458
459 private:
460  BooleanHistogram(const std::string& name, const BucketRanges* ranges);
461  BooleanHistogram(const std::string& name,
462                   const BucketRanges* ranges,
463                   HistogramBase::AtomicCount* counts,
464                   HistogramBase::AtomicCount* logged_counts,
465                   HistogramSamples::Metadata* meta,
466                   HistogramSamples::Metadata* logged_meta);
467
468  friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
469      base::PickleIterator* iter);
470  static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
471
472  DISALLOW_COPY_AND_ASSIGN(BooleanHistogram);
473};
474
475//------------------------------------------------------------------------------
476
477// CustomHistogram is a histogram for a set of custom integers.
478class BASE_EXPORT CustomHistogram : public Histogram {
479 public:
480  // |custom_ranges| contains a vector of limits on ranges. Each limit should be
481  // > 0 and < kSampleType_MAX. (Currently 0 is still accepted for backward
482  // compatibility). The limits can be unordered or contain duplication, but
483  // client should not depend on this.
484  static HistogramBase* FactoryGet(const std::string& name,
485                                   const std::vector<Sample>& custom_ranges,
486                                   int32_t flags);
487
488  // Overload of the above function that takes a const char* |name| param,
489  // to avoid code bloat from the std::string constructor being inlined into
490  // call sites.
491  static HistogramBase* FactoryGet(const char* name,
492                                   const std::vector<Sample>& custom_ranges,
493                                   int32_t flags);
494
495  // Create a histogram using data in persistent storage.
496  static std::unique_ptr<HistogramBase> PersistentCreate(
497      const std::string& name,
498      const BucketRanges* ranges,
499      HistogramBase::AtomicCount* counts,
500      HistogramBase::AtomicCount* logged_counts,
501      uint32_t counts_size,
502      HistogramSamples::Metadata* meta,
503      HistogramSamples::Metadata* logged_meta);
504
505  // Overridden from Histogram:
506  HistogramType GetHistogramType() const override;
507
508  // Helper method for transforming an array of valid enumeration values
509  // to the std::vector<int> expected by UMA_HISTOGRAM_CUSTOM_ENUMERATION.
510  // This function ensures that a guard bucket exists right after any
511  // valid sample value (unless the next higher sample is also a valid value),
512  // so that invalid samples never fall into the same bucket as valid samples.
513  // TODO(kaiwang): Change name to ArrayToCustomEnumRanges.
514  static std::vector<Sample> ArrayToCustomRanges(const Sample* values,
515                                                 uint32_t num_values);
516 protected:
517  class Factory;
518
519  CustomHistogram(const std::string& name,
520                  const BucketRanges* ranges);
521
522  CustomHistogram(const std::string& name,
523                  const BucketRanges* ranges,
524                  HistogramBase::AtomicCount* counts,
525                  HistogramBase::AtomicCount* logged_counts,
526                  uint32_t counts_size,
527                  HistogramSamples::Metadata* meta,
528                  HistogramSamples::Metadata* logged_meta);
529
530  // HistogramBase implementation:
531  bool SerializeInfoImpl(base::Pickle* pickle) const override;
532
533  double GetBucketSize(Count current, uint32_t i) const override;
534
535 private:
536  friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
537      base::PickleIterator* iter);
538  static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
539
540  static bool ValidateCustomRanges(const std::vector<Sample>& custom_ranges);
541
542  DISALLOW_COPY_AND_ASSIGN(CustomHistogram);
543};
544
545}  // namespace base
546
547#endif  // BASE_METRICS_HISTOGRAM_H_
548