1// Copyright (c) 2005, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31// Author: Sanjay Ghemawat <opensource@google.com>
32//
33// A data structure used by the caching malloc.  It maps from page# to
34// a pointer that contains info about that page.  We use two
35// representations: one for 32-bit addresses, and another for 64 bit
36// addresses.  Both representations provide the same interface.  The
37// first representation is implemented as a flat array, the seconds as
38// a three-level radix tree that strips away approximately 1/3rd of
39// the bits every time.
40//
41// The BITS parameter should be the number of bits required to hold
42// a page number.  E.g., with 32 bit pointers and 4K pages (i.e.,
43// page offset fits in lower 12 bits), BITS == 20.
44
45#ifndef TCMALLOC_PAGEMAP_H_
46#define TCMALLOC_PAGEMAP_H_
47
48#include "config.h"
49
50#include <stddef.h>                     // for NULL, size_t
51#include <string.h>                     // for memset
52#if defined HAVE_STDINT_H
53#include <stdint.h>
54#elif defined HAVE_INTTYPES_H
55#include <inttypes.h>
56#else
57#include <sys/types.h>
58#endif
59#ifdef WIN32
60// TODO(jar): This is not needed when TCMalloc_PageMap1_LazyCommit has an API
61// supporting commit and reservation of memory.
62#include "common.h"
63#endif
64
65#include "internal_logging.h"  // for ASSERT
66
67// Single-level array
68template <int BITS>
69class TCMalloc_PageMap1 {
70 private:
71  static const int LENGTH = 1 << BITS;
72
73  void** array_;
74
75 public:
76  typedef uintptr_t Number;
77
78  explicit TCMalloc_PageMap1(void* (*allocator)(size_t)) {
79    array_ = reinterpret_cast<void**>((*allocator)(sizeof(void*) << BITS));
80    memset(array_, 0, sizeof(void*) << BITS);
81  }
82
83  // Ensure that the map contains initialized entries "x .. x+n-1".
84  // Returns true if successful, false if we could not allocate memory.
85  bool Ensure(Number x, size_t n) {
86    // Nothing to do since flat array was allocated at start.  All
87    // that's left is to check for overflow (that is, we don't want to
88    // ensure a number y where array_[y] would be an out-of-bounds
89    // access).
90    return n <= LENGTH - x;   // an overflow-free way to do "x + n <= LENGTH"
91  }
92
93  void PreallocateMoreMemory() {}
94
95  // Return the current value for KEY.  Returns NULL if not yet set,
96  // or if k is out of range.
97  void* get(Number k) const {
98    if ((k >> BITS) > 0) {
99      return NULL;
100    }
101    return array_[k];
102  }
103
104  // REQUIRES "k" is in range "[0,2^BITS-1]".
105  // REQUIRES "k" has been ensured before.
106  //
107  // Sets the value 'v' for key 'k'.
108  void set(Number k, void* v) {
109    array_[k] = v;
110  }
111
112  // Return the first non-NULL pointer found in this map for
113  // a page number >= k.  Returns NULL if no such number is found.
114  void* Next(Number k) const {
115    while (k < (1 << BITS)) {
116      if (array_[k] != NULL) return array_[k];
117      k++;
118    }
119    return NULL;
120  }
121};
122
123#ifdef WIN32
124// Lazy commit, single-level array.
125// Very similar to PageMap1, except the page map is only committed as needed.
126// Since we don't return memory to the OS, the committed portion of the map will
127// only grow, and we'll only be called to Ensure when we really grow the heap.
128// We maintain a bit map to help us deduce if we've already committed a range
129// in our map.
130template <int BITS>
131class TCMalloc_PageMap1_LazyCommit {
132 private:
133  // Dimension of our page map array_.
134  static const int LENGTH = 1 << BITS;
135
136  // The page map array that sits in reserved virtual space.  Pages of this
137  // array are committed as they are needed.  For each page of virtual memory,
138  // we potentially have a pointer to a span instance.
139  void** array_;
140
141  // A bit vector that allows us to deduce what pages in array_ are committed.
142  // Note that 2^3 = 8 bits per char, and hence the use of the magical "3" in
143  // the array range gives us the effective "divide by 8".
144  char committed_[sizeof(void*) << (BITS - kPageShift - 3)];
145
146  // Given an |index| into |array_|, find the page number in |array_| that holds
147  // that element.
148  size_t ContainingPage(size_t index) const {
149    return (index * sizeof(*array_)) >> kPageShift;
150  }
151
152  // Find out if the given page_num index in array_ is in committed memory.
153  bool IsCommitted(size_t page_num) const {
154    return committed_[page_num >> 3] & (1 << (page_num & 0x7));
155  }
156
157  // Remember that the given page_num index in array_ is in committed memory.
158  void SetCommitted(size_t page_num) {
159    committed_[page_num >> 3] |= (1 << (page_num & 0x7));
160  }
161
162 public:
163  typedef uintptr_t Number;
164
165  explicit TCMalloc_PageMap1_LazyCommit(void* (*allocator)(size_t)) {
166    // TODO(jar): We need a reservation function, but current API to this class
167    // only provides an allocator.
168    // Get decommitted memory.  We will commit as necessary.
169    size_t size = sizeof(*array_) << BITS;
170    array_ = reinterpret_cast<void**>(VirtualAlloc(
171        NULL, size, MEM_RESERVE, PAGE_READWRITE));
172    tcmalloc::update_metadata_system_bytes(size);
173    tcmalloc::update_metadata_unmapped_bytes(size);
174
175    // Make sure we divided LENGTH evenly.
176    ASSERT(sizeof(committed_) * 8 == (LENGTH * sizeof(*array_)) >> kPageShift);
177    // Indicate that none of the pages of array_ have been committed yet.
178    memset(committed_, 0, sizeof(committed_));
179  }
180
181  // Ensure that the map contains initialized and committed entries in array_ to
182  // describe pages "x .. x+n-1".
183  // Returns true if successful, false if we could not ensure this.
184  // If we have to commit more memory in array_ (which also clears said memory),
185  // then we'll set some of the bits in committed_ to remember this fact.
186  // Only the bits of committed_ near end-points for calls to Ensure() are ever
187  // set, as the calls to Ensure() will never have overlapping ranges other than
188  // at their end-points.
189  //
190  // Example: Suppose the OS allocates memory in pages including 40...50, and
191  // later the OS allocates memory in pages 51...83.  When the first allocation
192  // of 40...50 is observed, then Ensure of (39,51) will be called.  The range
193  // shown in the arguments is extended so that tcmalloc can look to see if
194  // adjacent pages are part of a span that can be coaleced.  Later, when pages
195  // 51...83 are allocated, Ensure() will be called with arguments (50,84),
196  // broadened again for the same reason.
197  //
198  // After the above, we would NEVER get a call such as Ensure(45,60), as that
199  // overlaps with the interior of prior ensured regions.  We ONLY get an Ensure
200  // call when the OS has allocated memory, and since we NEVER give memory back
201  // to the OS, the OS can't possible allocate the same region to us twice, and
202  // can't induce an Ensure() on an interior of previous Ensure call.
203  //
204  // Also note that OS allocations are NOT guaranteed to be consecutive (there
205  // may be "holes" where code etc. uses the virtual addresses), or to appear in
206  // any order, such as lowest to highest, or vice versa (as other independent
207  // allocation systems in the process may be performing VirtualAllocations and
208  // VirtualFrees asynchronously.)
209  bool Ensure(Number x, size_t n) {
210    if (n > LENGTH - x)
211      return false;  // We won't Ensure mapping for last pages in memory.
212    ASSERT(n > 0);
213
214    // For a given page number in memory, calculate what page in array_ needs to
215    // be memory resident.  Note that we really only need a few bytes in array_
216    // for each page of virtual space we have to map, but we can only commit
217    // whole pages of array_.  For instance, a 4K page of array_ has about 1k
218    // entries, and hence can map about 1K pages, or a total of about 4MB
219    // typically. As a result, it is possible that the first entry in array_,
220    // and the n'th entry in array_, will sit in the same page of array_.
221    size_t first_page = ContainingPage(x);
222    size_t last_page = ContainingPage(x + n - 1);
223
224    // Check at each boundary, to see if we need to commit at that end.  Some
225    // other neighbor may have already forced us to commit at either or both
226    // boundaries.
227    if (IsCommitted(first_page)) {
228      if (first_page == last_page) return true;
229      ++first_page;
230      if (IsCommitted(first_page)) {
231        if (first_page == last_page) return true;
232        ++first_page;
233      }
234    }
235
236    if (IsCommitted(last_page)) {
237      if (first_page == last_page) return true;
238      --last_page;
239      if (IsCommitted(last_page)) {
240        if (first_page == last_page) return true;
241        --last_page;
242      }
243    }
244
245    ASSERT(!IsCommitted(last_page));
246    ASSERT(!IsCommitted(first_page));
247
248    void* start = reinterpret_cast<char*>(array_) + (first_page << kPageShift);
249    size_t length = (last_page - first_page + 1) << kPageShift;
250
251#ifndef NDEBUG
252    // Validate we are committing new sections, and hence we're not clearing any
253    // existing data.
254    MEMORY_BASIC_INFORMATION info = {0};
255    size_t result = VirtualQuery(start, &info, sizeof(info));
256    ASSERT(result);
257    ASSERT(0 == (info.State & MEM_COMMIT));  // It starts with uncommitted.
258    ASSERT(info.RegionSize >= length);       // Entire length is uncommitted.
259#endif
260
261    TCMalloc_SystemCommit(start, length);
262    tcmalloc::update_metadata_unmapped_bytes(-length);
263
264#ifndef NDEBUG
265    result = VirtualQuery(start, &info, sizeof(info));
266    ASSERT(result);
267    ASSERT(0 != (info.State & MEM_COMMIT));  // Now it is committed.
268    ASSERT(info.RegionSize >= length);       // Entire length is committed.
269#endif
270
271    // As noted in the large comment/example describing this method, we will
272    // never be called with a range of pages very much inside this |first_page|
273    // to |last_page| range.
274    // As a result, we only need to set bits for each end of that range, and one
275    // page inside each end.
276    SetCommitted(first_page);
277    if (first_page < last_page) {
278      SetCommitted(last_page);
279      SetCommitted(first_page + 1);  // These may be duplicates now.
280      SetCommitted(last_page - 1);
281    }
282
283    return true;
284  }
285
286  // This is a premature call to get all the meta-memory allocated, so as to
287  // avoid virtual space fragmentation.  Since we pre-reserved all memory, we
288  // don't need to do anything here (we won't fragment virtual space).
289  void PreallocateMoreMemory() {}
290
291  // Return the current value for KEY.  Returns NULL if not yet set,
292  // or if k is out of range.
293  void* get(Number k) const {
294    if ((k >> BITS) > 0) {
295      return NULL;
296    }
297    return array_[k];
298  }
299
300  // REQUIRES "k" is in range "[0,2^BITS-1]".
301  // REQUIRES "k" has been ensured before.
302  //
303  // Sets the value for KEY.
304  void set(Number k, void* v) {
305    array_[k] = v;
306  }
307  // Return the first non-NULL pointer found in this map for
308  // a page number >= k.  Returns NULL if no such number is found.
309  void* Next(Number k) const {
310    while (k < (1 << BITS)) {
311      if (array_[k] != NULL) return array_[k];
312      k++;
313    }
314    return NULL;
315  }
316};
317#endif  // WIN32
318
319
320// Two-level radix tree
321template <int BITS>
322class TCMalloc_PageMap2 {
323 private:
324  // Put 32 entries in the root and (2^BITS)/32 entries in each leaf.
325  static const int ROOT_BITS = 5;
326  static const int ROOT_LENGTH = 1 << ROOT_BITS;
327
328  static const int LEAF_BITS = BITS - ROOT_BITS;
329  static const int LEAF_LENGTH = 1 << LEAF_BITS;
330
331  // Leaf node
332  struct Leaf {
333    void* values[LEAF_LENGTH];
334  };
335
336  Leaf* root_[ROOT_LENGTH];             // Pointers to 32 child nodes
337  void* (*allocator_)(size_t);          // Memory allocator
338
339 public:
340  typedef uintptr_t Number;
341
342  explicit TCMalloc_PageMap2(void* (*allocator)(size_t)) {
343    allocator_ = allocator;
344    memset(root_, 0, sizeof(root_));
345  }
346
347  void* get(Number k) const {
348    const Number i1 = k >> LEAF_BITS;
349    const Number i2 = k & (LEAF_LENGTH-1);
350    if ((k >> BITS) > 0 || root_[i1] == NULL) {
351      return NULL;
352    }
353    return root_[i1]->values[i2];
354  }
355
356  void set(Number k, void* v) {
357    ASSERT(k >> BITS == 0);
358    const Number i1 = k >> LEAF_BITS;
359    const Number i2 = k & (LEAF_LENGTH-1);
360    root_[i1]->values[i2] = v;
361  }
362
363  bool Ensure(Number start, size_t n) {
364    for (Number key = start; key <= start + n - 1; ) {
365      const Number i1 = key >> LEAF_BITS;
366
367      // Check for overflow
368      if (i1 >= ROOT_LENGTH)
369        return false;
370
371      // Make 2nd level node if necessary
372      if (root_[i1] == NULL) {
373        Leaf* leaf = reinterpret_cast<Leaf*>((*allocator_)(sizeof(Leaf)));
374        if (leaf == NULL) return false;
375        memset(leaf, 0, sizeof(*leaf));
376        root_[i1] = leaf;
377      }
378
379      // Advance key past whatever is covered by this leaf node
380      key = ((key >> LEAF_BITS) + 1) << LEAF_BITS;
381    }
382    return true;
383  }
384
385  void PreallocateMoreMemory() {
386    // Allocate enough to keep track of all possible pages
387    Ensure(0, 1 << BITS);
388  }
389
390  void* Next(Number k) const {
391    while (k < (1 << BITS)) {
392      const Number i1 = k >> LEAF_BITS;
393      Leaf* leaf = root_[i1];
394      if (leaf != NULL) {
395        // Scan forward in leaf
396        for (Number i2 = k & (LEAF_LENGTH - 1); i2 < LEAF_LENGTH; i2++) {
397          if (leaf->values[i2] != NULL) {
398            return leaf->values[i2];
399          }
400        }
401      }
402      // Skip to next top-level entry
403      k = (i1 + 1) << LEAF_BITS;
404    }
405    return NULL;
406  }
407};
408
409// Three-level radix tree
410template <int BITS>
411class TCMalloc_PageMap3 {
412 private:
413  // How many bits should we consume at each interior level
414  static const int INTERIOR_BITS = (BITS + 2) / 3; // Round-up
415  static const int INTERIOR_LENGTH = 1 << INTERIOR_BITS;
416
417  // How many bits should we consume at leaf level
418  static const int LEAF_BITS = BITS - 2*INTERIOR_BITS;
419  static const int LEAF_LENGTH = 1 << LEAF_BITS;
420
421  // Interior node
422  struct Node {
423    Node* ptrs[INTERIOR_LENGTH];
424  };
425
426  // Leaf node
427  struct Leaf {
428    void* values[LEAF_LENGTH];
429  };
430
431  Node* root_;                          // Root of radix tree
432  void* (*allocator_)(size_t);          // Memory allocator
433
434  Node* NewNode() {
435    Node* result = reinterpret_cast<Node*>((*allocator_)(sizeof(Node)));
436    if (result != NULL) {
437      memset(result, 0, sizeof(*result));
438    }
439    return result;
440  }
441
442 public:
443  typedef uintptr_t Number;
444
445  explicit TCMalloc_PageMap3(void* (*allocator)(size_t)) {
446    allocator_ = allocator;
447    root_ = NewNode();
448  }
449
450  void* get(Number k) const {
451    const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS);
452    const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1);
453    const Number i3 = k & (LEAF_LENGTH-1);
454    if ((k >> BITS) > 0 ||
455        root_->ptrs[i1] == NULL || root_->ptrs[i1]->ptrs[i2] == NULL) {
456      return NULL;
457    }
458    return reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2])->values[i3];
459  }
460
461  void set(Number k, void* v) {
462    ASSERT(k >> BITS == 0);
463    const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS);
464    const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1);
465    const Number i3 = k & (LEAF_LENGTH-1);
466    reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2])->values[i3] = v;
467  }
468
469  bool Ensure(Number start, size_t n) {
470    for (Number key = start; key <= start + n - 1; ) {
471      const Number i1 = key >> (LEAF_BITS + INTERIOR_BITS);
472      const Number i2 = (key >> LEAF_BITS) & (INTERIOR_LENGTH-1);
473
474      // Check for overflow
475      if (i1 >= INTERIOR_LENGTH || i2 >= INTERIOR_LENGTH)
476        return false;
477
478      // Make 2nd level node if necessary
479      if (root_->ptrs[i1] == NULL) {
480        Node* n = NewNode();
481        if (n == NULL) return false;
482        root_->ptrs[i1] = n;
483      }
484
485      // Make leaf node if necessary
486      if (root_->ptrs[i1]->ptrs[i2] == NULL) {
487        Leaf* leaf = reinterpret_cast<Leaf*>((*allocator_)(sizeof(Leaf)));
488        if (leaf == NULL) return false;
489        memset(leaf, 0, sizeof(*leaf));
490        root_->ptrs[i1]->ptrs[i2] = reinterpret_cast<Node*>(leaf);
491      }
492
493      // Advance key past whatever is covered by this leaf node
494      key = ((key >> LEAF_BITS) + 1) << LEAF_BITS;
495    }
496    return true;
497  }
498
499  void PreallocateMoreMemory() {
500  }
501
502  void* Next(Number k) const {
503    while (k < (Number(1) << BITS)) {
504      const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS);
505      const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1);
506      if (root_->ptrs[i1] == NULL) {
507        // Advance to next top-level entry
508        k = (i1 + 1) << (LEAF_BITS + INTERIOR_BITS);
509      } else {
510        Leaf* leaf = reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2]);
511        if (leaf != NULL) {
512          for (Number i3 = (k & (LEAF_LENGTH-1)); i3 < LEAF_LENGTH; i3++) {
513            if (leaf->values[i3] != NULL) {
514              return leaf->values[i3];
515            }
516          }
517        }
518        // Advance to next interior entry
519        k = ((k >> LEAF_BITS) + 1) << LEAF_BITS;
520      }
521    }
522    return NULL;
523  }
524};
525
526#endif  // TCMALLOC_PAGEMAP_H_
527