1// Copyright (c) 2008, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31// Author: Sanjay Ghemawat <opensource@google.com>
32//
33// Common definitions for tcmalloc code.
34
35#ifndef TCMALLOC_COMMON_H_
36#define TCMALLOC_COMMON_H_
37
38#include "config.h"
39#include <stddef.h>                     // for size_t
40#ifdef HAVE_STDINT_H
41#include <stdint.h>                     // for uintptr_t, uint64_t
42#endif
43#include "internal_logging.h"  // for ASSERT, etc
44
45// Type that can hold a page number
46typedef uintptr_t PageID;
47
48// Type that can hold the length of a run of pages
49typedef uintptr_t Length;
50
51//-------------------------------------------------------------------
52// Configuration
53//-------------------------------------------------------------------
54
55// Using large pages speeds up the execution at a cost of larger memory use.
56// Deallocation may speed up by a factor as the page map gets 8x smaller, so
57// lookups in the page map result in fewer L2 cache misses, which translates to
58// speedup for application/platform combinations with high L2 cache pressure.
59// As the number of size classes increases with large pages, we increase
60// the thread cache allowance to avoid passing more free ranges to and from
61// central lists.  Also, larger pages are less likely to get freed.
62// These two factors cause a bounded increase in memory use.
63
64#if defined(TCMALLOC_LARGE_PAGES)
65static const size_t kPageShift  = 15;
66static const size_t kNumClasses = 78;
67#else
68static const size_t kPageShift  = 13;
69static const size_t kNumClasses = 86;
70#endif
71static const size_t kMaxThreadCacheSize = 4 << 20;
72
73static const size_t kPageSize   = 1 << kPageShift;
74static const size_t kMaxSize    = 256 * 1024;
75static const size_t kAlignment  = 8;
76static const size_t kLargeSizeClass = 0;
77// For all span-lengths < kMaxPages we keep an exact-size list.
78static const size_t kMaxPages = 1 << (20 - kPageShift);
79
80// Default bound on the total amount of thread caches.
81#ifdef TCMALLOC_SMALL_BUT_SLOW
82// Make the overall thread cache no bigger than that of a single thread
83// for the small memory footprint case.
84static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize;
85#else
86static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize;
87#endif
88
89// Lower bound on the per-thread cache sizes
90static const size_t kMinThreadCacheSize = kMaxSize * 2;
91
92// The number of bytes one ThreadCache will steal from another when
93// the first ThreadCache is forced to Scavenge(), delaying the
94// next call to Scavenge for this thread.
95static const size_t kStealAmount = 1 << 16;
96
97// The number of times that a deallocation can cause a freelist to
98// go over its max_length() before shrinking max_length().
99static const int kMaxOverages = 3;
100
101// Maximum length we allow a per-thread free-list to have before we
102// move objects from it into the corresponding central free-list.  We
103// want this big to avoid locking the central free-list too often.  It
104// should not hurt to make this list somewhat big because the
105// scavenging code will shrink it down when its contents are not in use.
106static const int kMaxDynamicFreeListLength = 8192;
107
108static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
109
110#if defined __x86_64__
111// All current and planned x86_64 processors only look at the lower 48 bits
112// in virtual to physical address translation.  The top 16 are thus unused.
113// TODO(rus): Under what operating systems can we increase it safely to 17?
114// This lets us use smaller page maps.  On first allocation, a 36-bit page map
115// uses only 96 KB instead of the 4.5 MB used by a 52-bit page map.
116static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48);
117#else
118static const int kAddressBits = 8 * sizeof(void*);
119#endif
120
121namespace tcmalloc {
122
123// Convert byte size into pages.  This won't overflow, but may return
124// an unreasonably large value if bytes is huge enough.
125inline Length pages(size_t bytes) {
126  return (bytes >> kPageShift) +
127      ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
128}
129
130// For larger allocation sizes, we use larger memory alignments to
131// reduce the number of size classes.
132int AlignmentForSize(size_t size);
133
134// Size-class information + mapping
135class SizeMap {
136 private:
137  // Number of objects to move between a per-thread list and a central
138  // list in one shot.  We want this to be not too small so we can
139  // amortize the lock overhead for accessing the central list.  Making
140  // it too big may temporarily cause unnecessary memory wastage in the
141  // per-thread free list until the scavenger cleans up the list.
142  int num_objects_to_move_[kNumClasses];
143
144  //-------------------------------------------------------------------
145  // Mapping from size to size_class and vice versa
146  //-------------------------------------------------------------------
147
148  // Sizes <= 1024 have an alignment >= 8.  So for such sizes we have an
149  // array indexed by ceil(size/8).  Sizes > 1024 have an alignment >= 128.
150  // So for these larger sizes we have an array indexed by ceil(size/128).
151  //
152  // We flatten both logical arrays into one physical array and use
153  // arithmetic to compute an appropriate index.  The constants used by
154  // ClassIndex() were selected to make the flattening work.
155  //
156  // Examples:
157  //   Size       Expression                      Index
158  //   -------------------------------------------------------
159  //   0          (0 + 7) / 8                     0
160  //   1          (1 + 7) / 8                     1
161  //   ...
162  //   1024       (1024 + 7) / 8                  128
163  //   1025       (1025 + 127 + (120<<7)) / 128   129
164  //   ...
165  //   32768      (32768 + 127 + (120<<7)) / 128  376
166  static const int kMaxSmallSize = 1024;
167  static const size_t kClassArraySize =
168      ((kMaxSize + 127 + (120 << 7)) >> 7) + 1;
169  unsigned char class_array_[kClassArraySize];
170
171  // Compute index of the class_array[] entry for a given size
172  static inline int ClassIndex(int s) {
173    ASSERT(0 <= s);
174    ASSERT(s <= kMaxSize);
175    const bool big = (s > kMaxSmallSize);
176    const int add_amount = big ? (127 + (120<<7)) : 7;
177    const int shift_amount = big ? 7 : 3;
178    return (s + add_amount) >> shift_amount;
179  }
180
181  int NumMoveSize(size_t size);
182
183  // Mapping from size class to max size storable in that class
184  size_t class_to_size_[kNumClasses];
185
186  // Mapping from size class to number of pages to allocate at a time
187  size_t class_to_pages_[kNumClasses];
188
189 public:
190  // Constructor should do nothing since we rely on explicit Init()
191  // call, which may or may not be called before the constructor runs.
192  SizeMap() { }
193
194  // Initialize the mapping arrays
195  void Init();
196
197  inline int SizeClass(int size) {
198    return class_array_[ClassIndex(size)];
199  }
200
201  // Get the byte-size for a specified class
202  inline size_t ByteSizeForClass(size_t cl) {
203    return class_to_size_[cl];
204  }
205
206  // Mapping from size class to max size storable in that class
207  inline size_t class_to_size(size_t cl) {
208    return class_to_size_[cl];
209  }
210
211  // Mapping from size class to number of pages to allocate at a time
212  inline size_t class_to_pages(size_t cl) {
213    return class_to_pages_[cl];
214  }
215
216  // Number of objects to move between a per-thread list and a central
217  // list in one shot.  We want this to be not too small so we can
218  // amortize the lock overhead for accessing the central list.  Making
219  // it too big may temporarily cause unnecessary memory wastage in the
220  // per-thread free list until the scavenger cleans up the list.
221  inline int num_objects_to_move(size_t cl) {
222    return num_objects_to_move_[cl];
223  }
224};
225
226// Allocates "bytes" worth of memory and returns it.  Increments
227// metadata_system_bytes appropriately.  May return NULL if allocation
228// fails.  Requires pageheap_lock is held.
229void* MetaDataAlloc(size_t bytes);
230
231// Returns the total number of bytes allocated from the system.
232// Requires pageheap_lock is held.
233uint64_t metadata_system_bytes();
234
235// size/depth are made the same size as a pointer so that some generic
236// code below can conveniently cast them back and forth to void*.
237static const int kMaxStackDepth = 31;
238struct StackTrace {
239  uintptr_t size;          // Size of object
240  uintptr_t depth;         // Number of PC values stored in array below
241  void*     stack[kMaxStackDepth];
242};
243
244}  // namespace tcmalloc
245
246#endif  // TCMALLOC_COMMON_H_
247