15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2008, Google Inc.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// All rights reserved.
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Redistribution and use in source and binary forms, with or without
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// modification, are permitted provided that the following conditions are
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// met:
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//     * Redistributions of source code must retain the above copyright
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// notice, this list of conditions and the following disclaimer.
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//     * Redistributions in binary form must reproduce the above
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// copyright notice, this list of conditions and the following disclaimer
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// in the documentation and/or other materials provided with the
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// distribution.
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//     * Neither the name of Google Inc. nor the names of its
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// contributors may be used to endorse or promote products derived from
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// this software without specific prior written permission.
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ---
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// All Rights Reserved.
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Author: Daniel Ford
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef TCMALLOC_SAMPLER_H_
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define TCMALLOC_SAMPLER_H_
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "config.h"
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stddef.h>                     // for size_t
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef HAVE_STDINT_H
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stdint.h>                     // for uint64_t, uint32_t, int32_t
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string.h>                     // for memcpy
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/basictypes.h"  // for ASSERT
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "internal_logging.h"  // for ASSERT
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace tcmalloc {
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//-------------------------------------------------------------------
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Sampler to decide when to create a sample trace for an allocation
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Not thread safe: Each thread should have it's own sampler object.
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Caller must use external synchronization if used
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// from multiple threads.
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// With 512K average sample step (the default):
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//  the probability of sampling a 4K allocation is about 0.00778
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//  the probability of sampling a 1MB allocation is about 0.865
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//  the probability of sampling a 1GB allocation is about 1.00000
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// In general, the probablity of sampling is an allocation of size X
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// given a flag value of Y (default 1M) is:
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//  1 - e^(-X/Y)
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// With 128K average sample step:
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//  the probability of sampling a 1MB allocation is about 0.99966
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//  the probability of sampling a 1GB allocation is about 1.0
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//  (about 1 - 2**(-26))
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// With 1M average sample step:
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//  the probability of sampling a 4K allocation is about 0.00390
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//  the probability of sampling a 1MB allocation is about 0.632
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//  the probability of sampling a 1GB allocation is about 1.0
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The sampler works by representing memory as a long stream from
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// which allocations are taken. Some of the bytes in this stream are
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// marked and if an allocation includes a marked byte then it is
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// sampled. Bytes are marked according to a Poisson point process
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// with each byte being marked independently with probability
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// p = 1/tcmalloc_sample_parameter.  This makes the probability
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// of sampling an allocation of X bytes equal to the CDF of
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// a geometric with mean tcmalloc_sample_parameter. (ie. the
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// probability that at least one byte in the range is marked). This
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// is accurately given by the CDF of the corresponding exponential
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// distribution : 1 - e^(X/tcmalloc_sample_parameter_)
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Independence of the byte marking ensures independence of
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the sampling of each allocation.
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This scheme is implemented by noting that, starting from any
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// fixed place, the number of bytes until the next marked byte
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// is geometrically distributed. This number is recorded as
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// bytes_until_sample_.  Every allocation subtracts from this
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// number until it is less than 0. When this happens the current
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// allocation is sampled.
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// When an allocation occurs, bytes_until_sample_ is reset to
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// a new independtly sampled geometric number of bytes. The
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// memoryless property of the point process means that this may
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// be taken as the number of bytes after the end of the current
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// allocation until the next marked byte. This ensures that
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// very large allocations which would intersect many marked bytes
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// only result in a single call to PickNextSamplingPoint.
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//-------------------------------------------------------------------
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class PERFTOOLS_DLL_DECL Sampler {
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Initialize this sampler.
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Passing a seed of 0 gives a non-deterministic
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // seed value given by casting the object ("this")
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Init(uint32_t seed);
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Cleanup();
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Record allocation of "k" bytes.  Return true iff allocation
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // should be sampled
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool SampleAllocation(size_t k);
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Generate a geometric with mean 512K (or FLAG_tcmalloc_sample_parameter)
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t PickNextSamplingPoint();
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Initialize the statics for the Sampler class
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static void InitStatics();
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the current sample period
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int GetSamplePeriod();
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The following are public for the purposes of testing
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static uint64_t NextRandom(uint64_t rnd_);  // Returns the next prng value
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static double FastLog2(const double & d);  // Computes Log2(x) quickly
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static void PopulateFastLog2Table();  // Populate the lookup table
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t        bytes_until_sample_;    // Bytes until we sample next
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint64_t      rnd_;                   // Cheap random number generator
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Statics for the fast log
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Note that this code may not depend on anything in //util
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // hence the duplication of functionality here
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const int kFastlogNumBits = 10;
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const int kFastlogMask = (1 << kFastlogNumBits) - 1;
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static double log_table_[1<<kFastlogNumBits];  // Constant
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)inline bool Sampler::SampleAllocation(size_t k) {
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (bytes_until_sample_ < k) {
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bytes_until_sample_ = PickNextSamplingPoint();
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return true;
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bytes_until_sample_ -= k;
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Inline functions which are public for testing purposes
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Returns the next prng value.
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// pRNG is: aX+b mod c with a = 0x5DEECE66D, b =  0xB, c = 1<<48
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This is the lrand64 generator.
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)inline uint64_t Sampler::NextRandom(uint64_t rnd) {
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const uint64_t prng_mult = 0x5DEECE66DLL;
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const uint64_t prng_add = 0xB;
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const uint64_t prng_mod_power = 48;
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const uint64_t prng_mod_mask =
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                ~((~static_cast<uint64_t>(0)) << prng_mod_power);
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return (prng_mult * rnd + prng_add) & prng_mod_mask;
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Adapted from //util/math/fastmath.[h|cc] by Noam Shazeer
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This mimics the VeryFastLog2 code in those files
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)inline double Sampler::FastLog2(const double & d) {
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT(d>0);
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  COMPILE_ASSERT(sizeof(d) == sizeof(uint64_t), DoubleMustBe64Bits);
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint64_t x;
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  memcpy(&x, &d, sizeof(x));   // we depend on the compiler inlining this
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const uint32_t x_high = x >> 32;
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const uint32_t y = x_high >> (20 - kFastlogNumBits) & kFastlogMask;
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023;
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return exponent + log_table_[y];
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace tcmalloc
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // TCMALLOC_SAMPLER_H_
180