1// Copyright (c) 2008, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31// All Rights Reserved.
32//
33// Author: Daniel Ford
34
35#ifndef TCMALLOC_SAMPLER_H_
36#define TCMALLOC_SAMPLER_H_
37
38#include "config.h"
39#include <stddef.h>                     // for size_t
40#ifdef HAVE_STDINT_H
41#include <stdint.h>                     // for uint64_t, uint32_t, int32_t
42#endif
43#include <string.h>                     // for memcpy
44#include "base/basictypes.h"  // for ASSERT
45#include "internal_logging.h"  // for ASSERT
46
47namespace tcmalloc {
48
49//-------------------------------------------------------------------
50// Sampler to decide when to create a sample trace for an allocation
51// Not thread safe: Each thread should have it's own sampler object.
52// Caller must use external synchronization if used
53// from multiple threads.
54//
55// With 512K average sample step (the default):
56//  the probability of sampling a 4K allocation is about 0.00778
57//  the probability of sampling a 1MB allocation is about 0.865
58//  the probability of sampling a 1GB allocation is about 1.00000
59// In general, the probablity of sampling is an allocation of size X
60// given a flag value of Y (default 1M) is:
61//  1 - e^(-X/Y)
62//
63// With 128K average sample step:
64//  the probability of sampling a 1MB allocation is about 0.99966
65//  the probability of sampling a 1GB allocation is about 1.0
66//  (about 1 - 2**(-26))
67// With 1M average sample step:
68//  the probability of sampling a 4K allocation is about 0.00390
69//  the probability of sampling a 1MB allocation is about 0.632
70//  the probability of sampling a 1GB allocation is about 1.0
71//
72// The sampler works by representing memory as a long stream from
73// which allocations are taken. Some of the bytes in this stream are
74// marked and if an allocation includes a marked byte then it is
75// sampled. Bytes are marked according to a Poisson point process
76// with each byte being marked independently with probability
77// p = 1/tcmalloc_sample_parameter.  This makes the probability
78// of sampling an allocation of X bytes equal to the CDF of
79// a geometric with mean tcmalloc_sample_parameter. (ie. the
80// probability that at least one byte in the range is marked). This
81// is accurately given by the CDF of the corresponding exponential
82// distribution : 1 - e^(X/tcmalloc_sample_parameter_)
83// Independence of the byte marking ensures independence of
84// the sampling of each allocation.
85//
86// This scheme is implemented by noting that, starting from any
87// fixed place, the number of bytes until the next marked byte
88// is geometrically distributed. This number is recorded as
89// bytes_until_sample_.  Every allocation subtracts from this
90// number until it is less than 0. When this happens the current
91// allocation is sampled.
92//
93// When an allocation occurs, bytes_until_sample_ is reset to
94// a new independtly sampled geometric number of bytes. The
95// memoryless property of the point process means that this may
96// be taken as the number of bytes after the end of the current
97// allocation until the next marked byte. This ensures that
98// very large allocations which would intersect many marked bytes
99// only result in a single call to PickNextSamplingPoint.
100//-------------------------------------------------------------------
101
102class PERFTOOLS_DLL_DECL Sampler {
103 public:
104  // Initialize this sampler.
105  // Passing a seed of 0 gives a non-deterministic
106  // seed value given by casting the object ("this")
107  void Init(uint32_t seed);
108  void Cleanup();
109
110  // Record allocation of "k" bytes.  Return true iff allocation
111  // should be sampled
112  bool SampleAllocation(size_t k);
113
114  // Generate a geometric with mean 512K (or FLAG_tcmalloc_sample_parameter)
115  size_t PickNextSamplingPoint();
116
117  // Initialize the statics for the Sampler class
118  static void InitStatics();
119
120  // Returns the current sample period
121  int GetSamplePeriod();
122
123  // The following are public for the purposes of testing
124  static uint64_t NextRandom(uint64_t rnd_);  // Returns the next prng value
125  static double FastLog2(const double & d);  // Computes Log2(x) quickly
126  static void PopulateFastLog2Table();  // Populate the lookup table
127
128 private:
129  size_t        bytes_until_sample_;    // Bytes until we sample next
130  uint64_t      rnd_;                   // Cheap random number generator
131
132  // Statics for the fast log
133  // Note that this code may not depend on anything in //util
134  // hence the duplication of functionality here
135  static const int kFastlogNumBits = 10;
136  static const int kFastlogMask = (1 << kFastlogNumBits) - 1;
137  static double log_table_[1<<kFastlogNumBits];  // Constant
138};
139
140inline bool Sampler::SampleAllocation(size_t k) {
141  if (bytes_until_sample_ < k) {
142    bytes_until_sample_ = PickNextSamplingPoint();
143    return true;
144  } else {
145    bytes_until_sample_ -= k;
146    return false;
147  }
148}
149
150// Inline functions which are public for testing purposes
151
152// Returns the next prng value.
153// pRNG is: aX+b mod c with a = 0x5DEECE66D, b =  0xB, c = 1<<48
154// This is the lrand64 generator.
155inline uint64_t Sampler::NextRandom(uint64_t rnd) {
156  const uint64_t prng_mult = 0x5DEECE66DLL;
157  const uint64_t prng_add = 0xB;
158  const uint64_t prng_mod_power = 48;
159  const uint64_t prng_mod_mask =
160                ~((~static_cast<uint64_t>(0)) << prng_mod_power);
161  return (prng_mult * rnd + prng_add) & prng_mod_mask;
162}
163
164// Adapted from //util/math/fastmath.[h|cc] by Noam Shazeer
165// This mimics the VeryFastLog2 code in those files
166inline double Sampler::FastLog2(const double & d) {
167  ASSERT(d>0);
168  COMPILE_ASSERT(sizeof(d) == sizeof(uint64_t), DoubleMustBe64Bits);
169  uint64_t x;
170  memcpy(&x, &d, sizeof(x));   // we depend on the compiler inlining this
171  const uint32_t x_high = x >> 32;
172  const uint32_t y = x_high >> (20 - kFastlogNumBits) & kFastlogMask;
173  const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023;
174  return exponent + log_table_[y];
175}
176
177}  // namespace tcmalloc
178
179#endif  // TCMALLOC_SAMPLER_H_
180