1// Copyright (c) 2005, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31// Author: Sanjay Ghemawat
32
33#include <config.h>
34#include <errno.h>                      // for EAGAIN, errno
35#include <fcntl.h>                      // for open, O_RDWR
36#include <stddef.h>                     // for size_t, NULL, ptrdiff_t
37#if defined HAVE_STDINT_H
38#include <stdint.h>                     // for uintptr_t, intptr_t
39#elif defined HAVE_INTTYPES_H
40#include <inttypes.h>
41#else
42#include <sys/types.h>
43#endif
44#ifdef HAVE_MMAP
45#include <sys/mman.h>                   // for munmap, mmap, MADV_DONTNEED, etc
46#endif
47#ifdef HAVE_UNISTD_H
48#include <unistd.h>                     // for sbrk, getpagesize, off_t
49#endif
50#include <new>                          // for operator new
51#include <gperftools/malloc_extension.h>
52#include "base/basictypes.h"
53#include "base/commandlineflags.h"
54#include "base/spinlock.h"              // for SpinLockHolder, SpinLock, etc
55#include "common.h"
56#include "internal_logging.h"
57
58// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old
59// form of the name instead.
60#ifndef MAP_ANONYMOUS
61# define MAP_ANONYMOUS MAP_ANON
62#endif
63
64// MADV_FREE is specifically designed for use by malloc(), but only
65// FreeBSD supports it; in linux we fall back to the somewhat inferior
66// MADV_DONTNEED.
67#if !defined(MADV_FREE) && defined(MADV_DONTNEED)
68# define MADV_FREE  MADV_DONTNEED
69#endif
70
71// Solaris has a bug where it doesn't declare madvise() for C++.
72//    http://www.opensolaris.org/jive/thread.jspa?threadID=21035&tstart=0
73#if defined(__sun) && defined(__SVR4)
74# include <sys/types.h>    // for caddr_t
75  extern "C" { extern int madvise(caddr_t, size_t, int); }
76#endif
77
78// Set kDebugMode mode so that we can have use C++ conditionals
79// instead of preprocessor conditionals.
80#ifdef NDEBUG
81static const bool kDebugMode = false;
82#else
83static const bool kDebugMode = true;
84#endif
85
86// TODO(sanjay): Move the code below into the tcmalloc namespace
87using tcmalloc::kLog;
88using tcmalloc::Log;
89
90// Anonymous namespace to avoid name conflicts on "CheckAddressBits".
91namespace {
92
93// Check that no bit is set at position ADDRESS_BITS or higher.
94template <int ADDRESS_BITS> bool CheckAddressBits(uintptr_t ptr) {
95  return (ptr >> ADDRESS_BITS) == 0;
96}
97
98// Specialize for the bit width of a pointer to avoid undefined shift.
99template <> bool CheckAddressBits<8 * sizeof(void*)>(uintptr_t ptr) {
100  return true;
101}
102
103}  // Anonymous namespace to avoid name conflicts on "CheckAddressBits".
104
105COMPILE_ASSERT(kAddressBits <= 8 * sizeof(void*),
106               address_bits_larger_than_pointer_size);
107
108// Structure for discovering alignment
109union MemoryAligner {
110  void*  p;
111  double d;
112  size_t s;
113} CACHELINE_ALIGNED;
114
115static SpinLock spinlock(SpinLock::LINKER_INITIALIZED);
116
117#if defined(HAVE_MMAP) || defined(MADV_FREE)
118// Page size is initialized on demand (only needed for mmap-based allocators)
119static size_t pagesize = 0;
120#endif
121
122// The current system allocator
123SysAllocator* sys_alloc = NULL;
124
125// Configuration parameters.
126DEFINE_int32(malloc_devmem_start,
127             EnvToInt("TCMALLOC_DEVMEM_START", 0),
128             "Physical memory starting location in MB for /dev/mem allocation."
129             "  Setting this to 0 disables /dev/mem allocation");
130DEFINE_int32(malloc_devmem_limit,
131             EnvToInt("TCMALLOC_DEVMEM_LIMIT", 0),
132             "Physical memory limit location in MB for /dev/mem allocation."
133             "  Setting this to 0 means no limit.");
134DEFINE_bool(malloc_skip_sbrk,
135            EnvToBool("TCMALLOC_SKIP_SBRK", false),
136            "Whether sbrk can be used to obtain memory.");
137DEFINE_bool(malloc_skip_mmap,
138            EnvToBool("TCMALLOC_SKIP_MMAP", false),
139            "Whether mmap can be used to obtain memory.");
140
141// static allocators
142class SbrkSysAllocator : public SysAllocator {
143public:
144  SbrkSysAllocator() : SysAllocator() {
145  }
146  void* Alloc(size_t size, size_t *actual_size, size_t alignment);
147};
148static char sbrk_space[sizeof(SbrkSysAllocator)];
149
150class MmapSysAllocator : public SysAllocator {
151public:
152  MmapSysAllocator() : SysAllocator() {
153  }
154  void* Alloc(size_t size, size_t *actual_size, size_t alignment);
155};
156static char mmap_space[sizeof(MmapSysAllocator)];
157
158class DevMemSysAllocator : public SysAllocator {
159public:
160  DevMemSysAllocator() : SysAllocator() {
161  }
162  void* Alloc(size_t size, size_t *actual_size, size_t alignment);
163};
164
165class DefaultSysAllocator : public SysAllocator {
166 public:
167  DefaultSysAllocator() : SysAllocator() {
168    for (int i = 0; i < kMaxAllocators; i++) {
169      failed_[i] = true;
170      allocs_[i] = NULL;
171      names_[i] = NULL;
172    }
173  }
174  void SetChildAllocator(SysAllocator* alloc, unsigned int index,
175                         const char* name) {
176    if (index < kMaxAllocators && alloc != NULL) {
177      allocs_[index] = alloc;
178      failed_[index] = false;
179      names_[index] = name;
180    }
181  }
182  void* Alloc(size_t size, size_t *actual_size, size_t alignment);
183
184 private:
185  static const int kMaxAllocators = 2;
186  bool failed_[kMaxAllocators];
187  SysAllocator* allocs_[kMaxAllocators];
188  const char* names_[kMaxAllocators];
189};
190static char default_space[sizeof(DefaultSysAllocator)];
191static const char sbrk_name[] = "SbrkSysAllocator";
192static const char mmap_name[] = "MmapSysAllocator";
193
194
195void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size,
196                              size_t alignment) {
197#ifndef HAVE_SBRK
198  failed_ = true;
199  return NULL;
200#else
201  // Check if we should use sbrk allocation.
202  // FLAGS_malloc_skip_sbrk starts out as false (its uninitialized
203  // state) and eventually gets initialized to the specified value.  Note
204  // that this code runs for a while before the flags are initialized.
205  // That means that even if this flag is set to true, some (initial)
206  // memory will be allocated with sbrk before the flag takes effect.
207  if (FLAGS_malloc_skip_sbrk) {
208    return NULL;
209  }
210
211  // sbrk will release memory if passed a negative number, so we do
212  // a strict check here
213  if (static_cast<ptrdiff_t>(size + alignment) < 0) return NULL;
214
215  // This doesn't overflow because TCMalloc_SystemAlloc has already
216  // tested for overflow at the alignment boundary.
217  size = ((size + alignment - 1) / alignment) * alignment;
218
219  // "actual_size" indicates that the bytes from the returned pointer
220  // p up to and including (p + actual_size - 1) have been allocated.
221  if (actual_size) {
222    *actual_size = size;
223  }
224
225  // Check that we we're not asking for so much more memory that we'd
226  // wrap around the end of the virtual address space.  (This seems
227  // like something sbrk() should check for us, and indeed opensolaris
228  // does, but glibc does not:
229  //    http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/lib/libc/port/sys/sbrk.c?a=true
230  //    http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/libc/misc/sbrk.c?rev=1.1.2.1&content-type=text/plain&cvsroot=glibc
231  // Without this check, sbrk may succeed when it ought to fail.)
232  if (reinterpret_cast<intptr_t>(sbrk(0)) + size < size) {
233    return NULL;
234  }
235
236  void* result = sbrk(size);
237  if (result == reinterpret_cast<void*>(-1)) {
238    return NULL;
239  }
240
241  // Is it aligned?
242  uintptr_t ptr = reinterpret_cast<uintptr_t>(result);
243  if ((ptr & (alignment-1)) == 0)  return result;
244
245  // Try to get more memory for alignment
246  size_t extra = alignment - (ptr & (alignment-1));
247  void* r2 = sbrk(extra);
248  if (reinterpret_cast<uintptr_t>(r2) == (ptr + size)) {
249    // Contiguous with previous result
250    return reinterpret_cast<void*>(ptr + extra);
251  }
252
253  // Give up and ask for "size + alignment - 1" bytes so
254  // that we can find an aligned region within it.
255  result = sbrk(size + alignment - 1);
256  if (result == reinterpret_cast<void*>(-1)) {
257    return NULL;
258  }
259  ptr = reinterpret_cast<uintptr_t>(result);
260  if ((ptr & (alignment-1)) != 0) {
261    ptr += alignment - (ptr & (alignment-1));
262  }
263  return reinterpret_cast<void*>(ptr);
264#endif  // HAVE_SBRK
265}
266
267void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size,
268                              size_t alignment) {
269#ifndef HAVE_MMAP
270  failed_ = true;
271  return NULL;
272#else
273  // Check if we should use mmap allocation.
274  // FLAGS_malloc_skip_mmap starts out as false (its uninitialized
275  // state) and eventually gets initialized to the specified value.  Note
276  // that this code runs for a while before the flags are initialized.
277  // Chances are we never get here before the flags are initialized since
278  // sbrk is used until the heap is exhausted (before mmap is used).
279  if (FLAGS_malloc_skip_mmap) {
280    return NULL;
281  }
282
283  // Enforce page alignment
284  if (pagesize == 0) pagesize = getpagesize();
285  if (alignment < pagesize) alignment = pagesize;
286  size_t aligned_size = ((size + alignment - 1) / alignment) * alignment;
287  if (aligned_size < size) {
288    return NULL;
289  }
290  size = aligned_size;
291
292  // "actual_size" indicates that the bytes from the returned pointer
293  // p up to and including (p + actual_size - 1) have been allocated.
294  if (actual_size) {
295    *actual_size = size;
296  }
297
298  // Ask for extra memory if alignment > pagesize
299  size_t extra = 0;
300  if (alignment > pagesize) {
301    extra = alignment - pagesize;
302  }
303
304  // Note: size + extra does not overflow since:
305  //            size + alignment < (1<<NBITS).
306  // and        extra <= alignment
307  // therefore  size + extra < (1<<NBITS)
308  void* result = mmap(NULL, size + extra,
309                      PROT_READ|PROT_WRITE,
310                      MAP_PRIVATE|MAP_ANONYMOUS,
311                      -1, 0);
312  if (result == reinterpret_cast<void*>(MAP_FAILED)) {
313    return NULL;
314  }
315
316  // Adjust the return memory so it is aligned
317  uintptr_t ptr = reinterpret_cast<uintptr_t>(result);
318  size_t adjust = 0;
319  if ((ptr & (alignment - 1)) != 0) {
320    adjust = alignment - (ptr & (alignment - 1));
321  }
322
323  // Return the unused memory to the system
324  if (adjust > 0) {
325    munmap(reinterpret_cast<void*>(ptr), adjust);
326  }
327  if (adjust < extra) {
328    munmap(reinterpret_cast<void*>(ptr + adjust + size), extra - adjust);
329  }
330
331  ptr += adjust;
332  return reinterpret_cast<void*>(ptr);
333#endif  // HAVE_MMAP
334}
335
336void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size,
337                                size_t alignment) {
338#ifndef HAVE_MMAP
339  failed_ = true;
340  return NULL;
341#else
342  static bool initialized = false;
343  static off_t physmem_base;  // next physical memory address to allocate
344  static off_t physmem_limit; // maximum physical address allowed
345  static int physmem_fd;      // file descriptor for /dev/mem
346
347  // Check if we should use /dev/mem allocation.  Note that it may take
348  // a while to get this flag initialized, so meanwhile we fall back to
349  // the next allocator.  (It looks like 7MB gets allocated before
350  // this flag gets initialized -khr.)
351  if (FLAGS_malloc_devmem_start == 0) {
352    // NOTE: not a devmem_failure - we'd like TCMalloc_SystemAlloc to
353    // try us again next time.
354    return NULL;
355  }
356
357  if (!initialized) {
358    physmem_fd = open("/dev/mem", O_RDWR);
359    if (physmem_fd < 0) {
360      return NULL;
361    }
362    physmem_base = FLAGS_malloc_devmem_start*1024LL*1024LL;
363    physmem_limit = FLAGS_malloc_devmem_limit*1024LL*1024LL;
364    initialized = true;
365  }
366
367  // Enforce page alignment
368  if (pagesize == 0) pagesize = getpagesize();
369  if (alignment < pagesize) alignment = pagesize;
370  size_t aligned_size = ((size + alignment - 1) / alignment) * alignment;
371  if (aligned_size < size) {
372    return NULL;
373  }
374  size = aligned_size;
375
376  // "actual_size" indicates that the bytes from the returned pointer
377  // p up to and including (p + actual_size - 1) have been allocated.
378  if (actual_size) {
379    *actual_size = size;
380  }
381
382  // Ask for extra memory if alignment > pagesize
383  size_t extra = 0;
384  if (alignment > pagesize) {
385    extra = alignment - pagesize;
386  }
387
388  // check to see if we have any memory left
389  if (physmem_limit != 0 &&
390      ((size + extra) > (physmem_limit - physmem_base))) {
391    return NULL;
392  }
393
394  // Note: size + extra does not overflow since:
395  //            size + alignment < (1<<NBITS).
396  // and        extra <= alignment
397  // therefore  size + extra < (1<<NBITS)
398  void *result = mmap(0, size + extra, PROT_WRITE|PROT_READ,
399                      MAP_SHARED, physmem_fd, physmem_base);
400  if (result == reinterpret_cast<void*>(MAP_FAILED)) {
401    return NULL;
402  }
403  uintptr_t ptr = reinterpret_cast<uintptr_t>(result);
404
405  // Adjust the return memory so it is aligned
406  size_t adjust = 0;
407  if ((ptr & (alignment - 1)) != 0) {
408    adjust = alignment - (ptr & (alignment - 1));
409  }
410
411  // Return the unused virtual memory to the system
412  if (adjust > 0) {
413    munmap(reinterpret_cast<void*>(ptr), adjust);
414  }
415  if (adjust < extra) {
416    munmap(reinterpret_cast<void*>(ptr + adjust + size), extra - adjust);
417  }
418
419  ptr += adjust;
420  physmem_base += adjust + size;
421
422  return reinterpret_cast<void*>(ptr);
423#endif  // HAVE_MMAP
424}
425
426void* DefaultSysAllocator::Alloc(size_t size, size_t *actual_size,
427                                 size_t alignment) {
428  for (int i = 0; i < kMaxAllocators; i++) {
429    if (!failed_[i] && allocs_[i] != NULL) {
430      void* result = allocs_[i]->Alloc(size, actual_size, alignment);
431      if (result != NULL) {
432        return result;
433      }
434      failed_[i] = true;
435    }
436  }
437  // After both failed, reset "failed_" to false so that a single failed
438  // allocation won't make the allocator never work again.
439  for (int i = 0; i < kMaxAllocators; i++) {
440    failed_[i] = false;
441  }
442  return NULL;
443}
444
445static bool system_alloc_inited = false;
446void InitSystemAllocators(void) {
447  MmapSysAllocator *mmap = new (mmap_space) MmapSysAllocator();
448  SbrkSysAllocator *sbrk = new (sbrk_space) SbrkSysAllocator();
449
450  // In 64-bit debug mode, place the mmap allocator first since it
451  // allocates pointers that do not fit in 32 bits and therefore gives
452  // us better testing of code's 64-bit correctness.  It also leads to
453  // less false negatives in heap-checking code.  (Numbers are less
454  // likely to look like pointers and therefore the conservative gc in
455  // the heap-checker is less likely to misinterpret a number as a
456  // pointer).
457  DefaultSysAllocator *sdef = new (default_space) DefaultSysAllocator();
458  if (kDebugMode && sizeof(void*) > 4) {
459    sdef->SetChildAllocator(mmap, 0, mmap_name);
460    sdef->SetChildAllocator(sbrk, 1, sbrk_name);
461  } else {
462    sdef->SetChildAllocator(sbrk, 0, sbrk_name);
463    sdef->SetChildAllocator(mmap, 1, mmap_name);
464  }
465  sys_alloc = sdef;
466}
467
468void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size,
469                           size_t alignment) {
470  // Discard requests that overflow
471  if (size + alignment < size) return NULL;
472
473  SpinLockHolder lock_holder(&spinlock);
474
475  if (!system_alloc_inited) {
476    InitSystemAllocators();
477    system_alloc_inited = true;
478  }
479
480  // Enforce minimum alignment
481  if (alignment < sizeof(MemoryAligner)) alignment = sizeof(MemoryAligner);
482
483  void* result = sys_alloc->Alloc(size, actual_size, alignment);
484  if (result != NULL) {
485    if (actual_size) {
486      CheckAddressBits<kAddressBits>(
487          reinterpret_cast<uintptr_t>(result) + *actual_size - 1);
488    } else {
489      CheckAddressBits<kAddressBits>(
490          reinterpret_cast<uintptr_t>(result) + size - 1);
491    }
492  }
493  return result;
494}
495
496void TCMalloc_SystemRelease(void* start, size_t length) {
497#ifdef MADV_FREE
498  if (FLAGS_malloc_devmem_start) {
499    // It's not safe to use MADV_FREE/MADV_DONTNEED if we've been
500    // mapping /dev/mem for heap memory.
501    return;
502  }
503  if (pagesize == 0) pagesize = getpagesize();
504  const size_t pagemask = pagesize - 1;
505
506  size_t new_start = reinterpret_cast<size_t>(start);
507  size_t end = new_start + length;
508  size_t new_end = end;
509
510  // Round up the starting address and round down the ending address
511  // to be page aligned:
512  new_start = (new_start + pagesize - 1) & ~pagemask;
513  new_end = new_end & ~pagemask;
514
515  ASSERT((new_start & pagemask) == 0);
516  ASSERT((new_end & pagemask) == 0);
517  ASSERT(new_start >= reinterpret_cast<size_t>(start));
518  ASSERT(new_end <= end);
519
520  if (new_end > new_start) {
521    // Note -- ignoring most return codes, because if this fails it
522    // doesn't matter...
523    while (madvise(reinterpret_cast<char*>(new_start), new_end - new_start,
524                   MADV_FREE) == -1 &&
525           errno == EAGAIN) {
526      // NOP
527    }
528  }
529#endif
530}
531