1// Copyright (c) 2005, Google Inc. 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: 7// 8// * Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// * Redistributions in binary form must reproduce the above 11// copyright notice, this list of conditions and the following disclaimer 12// in the documentation and/or other materials provided with the 13// distribution. 14// * Neither the name of Google Inc. nor the names of its 15// contributors may be used to endorse or promote products derived from 16// this software without specific prior written permission. 17// 18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30// --- 31// Author: Sanjay Ghemawat 32 33#include <config.h> 34#include <errno.h> // for EAGAIN, errno 35#include <fcntl.h> // for open, O_RDWR 36#include <stddef.h> // for size_t, NULL, ptrdiff_t 37#if defined HAVE_STDINT_H 38#include <stdint.h> // for uintptr_t, intptr_t 39#elif defined HAVE_INTTYPES_H 40#include <inttypes.h> 41#else 42#include <sys/types.h> 43#endif 44#ifdef HAVE_MMAP 45#include <sys/mman.h> // for munmap, mmap, MADV_DONTNEED, etc 46#endif 47#ifdef HAVE_UNISTD_H 48#include <unistd.h> // for sbrk, getpagesize, off_t 49#endif 50#include <new> // for operator new 51#include <gperftools/malloc_extension.h> 52#include "base/basictypes.h" 53#include "base/commandlineflags.h" 54#include "base/spinlock.h" // for SpinLockHolder, SpinLock, etc 55#include "common.h" 56#include "internal_logging.h" 57 58// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old 59// form of the name instead. 60#ifndef MAP_ANONYMOUS 61# define MAP_ANONYMOUS MAP_ANON 62#endif 63 64// MADV_FREE is specifically designed for use by malloc(), but only 65// FreeBSD supports it; in linux we fall back to the somewhat inferior 66// MADV_DONTNEED. 67#if !defined(MADV_FREE) && defined(MADV_DONTNEED) 68# define MADV_FREE MADV_DONTNEED 69#endif 70 71// Solaris has a bug where it doesn't declare madvise() for C++. 72// http://www.opensolaris.org/jive/thread.jspa?threadID=21035&tstart=0 73#if defined(__sun) && defined(__SVR4) 74# include <sys/types.h> // for caddr_t 75 extern "C" { extern int madvise(caddr_t, size_t, int); } 76#endif 77 78// Set kDebugMode mode so that we can have use C++ conditionals 79// instead of preprocessor conditionals. 80#ifdef NDEBUG 81static const bool kDebugMode = false; 82#else 83static const bool kDebugMode = true; 84#endif 85 86// TODO(sanjay): Move the code below into the tcmalloc namespace 87using tcmalloc::kLog; 88using tcmalloc::Log; 89 90// Anonymous namespace to avoid name conflicts on "CheckAddressBits". 91namespace { 92 93// Check that no bit is set at position ADDRESS_BITS or higher. 94template <int ADDRESS_BITS> bool CheckAddressBits(uintptr_t ptr) { 95 return (ptr >> ADDRESS_BITS) == 0; 96} 97 98// Specialize for the bit width of a pointer to avoid undefined shift. 99template <> bool CheckAddressBits<8 * sizeof(void*)>(uintptr_t ptr) { 100 return true; 101} 102 103} // Anonymous namespace to avoid name conflicts on "CheckAddressBits". 104 105COMPILE_ASSERT(kAddressBits <= 8 * sizeof(void*), 106 address_bits_larger_than_pointer_size); 107 108// Structure for discovering alignment 109union MemoryAligner { 110 void* p; 111 double d; 112 size_t s; 113} CACHELINE_ALIGNED; 114 115static SpinLock spinlock(SpinLock::LINKER_INITIALIZED); 116 117#if defined(HAVE_MMAP) || defined(MADV_FREE) 118// Page size is initialized on demand (only needed for mmap-based allocators) 119static size_t pagesize = 0; 120#endif 121 122// The current system allocator 123SysAllocator* sys_alloc = NULL; 124 125// Configuration parameters. 126DEFINE_int32(malloc_devmem_start, 127 EnvToInt("TCMALLOC_DEVMEM_START", 0), 128 "Physical memory starting location in MB for /dev/mem allocation." 129 " Setting this to 0 disables /dev/mem allocation"); 130DEFINE_int32(malloc_devmem_limit, 131 EnvToInt("TCMALLOC_DEVMEM_LIMIT", 0), 132 "Physical memory limit location in MB for /dev/mem allocation." 133 " Setting this to 0 means no limit."); 134DEFINE_bool(malloc_skip_sbrk, 135 EnvToBool("TCMALLOC_SKIP_SBRK", false), 136 "Whether sbrk can be used to obtain memory."); 137DEFINE_bool(malloc_skip_mmap, 138 EnvToBool("TCMALLOC_SKIP_MMAP", false), 139 "Whether mmap can be used to obtain memory."); 140 141// static allocators 142class SbrkSysAllocator : public SysAllocator { 143public: 144 SbrkSysAllocator() : SysAllocator() { 145 } 146 void* Alloc(size_t size, size_t *actual_size, size_t alignment); 147}; 148static char sbrk_space[sizeof(SbrkSysAllocator)]; 149 150class MmapSysAllocator : public SysAllocator { 151public: 152 MmapSysAllocator() : SysAllocator() { 153 } 154 void* Alloc(size_t size, size_t *actual_size, size_t alignment); 155}; 156static char mmap_space[sizeof(MmapSysAllocator)]; 157 158class DevMemSysAllocator : public SysAllocator { 159public: 160 DevMemSysAllocator() : SysAllocator() { 161 } 162 void* Alloc(size_t size, size_t *actual_size, size_t alignment); 163}; 164 165class DefaultSysAllocator : public SysAllocator { 166 public: 167 DefaultSysAllocator() : SysAllocator() { 168 for (int i = 0; i < kMaxAllocators; i++) { 169 failed_[i] = true; 170 allocs_[i] = NULL; 171 names_[i] = NULL; 172 } 173 } 174 void SetChildAllocator(SysAllocator* alloc, unsigned int index, 175 const char* name) { 176 if (index < kMaxAllocators && alloc != NULL) { 177 allocs_[index] = alloc; 178 failed_[index] = false; 179 names_[index] = name; 180 } 181 } 182 void* Alloc(size_t size, size_t *actual_size, size_t alignment); 183 184 private: 185 static const int kMaxAllocators = 2; 186 bool failed_[kMaxAllocators]; 187 SysAllocator* allocs_[kMaxAllocators]; 188 const char* names_[kMaxAllocators]; 189}; 190static char default_space[sizeof(DefaultSysAllocator)]; 191static const char sbrk_name[] = "SbrkSysAllocator"; 192static const char mmap_name[] = "MmapSysAllocator"; 193 194 195void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, 196 size_t alignment) { 197#ifndef HAVE_SBRK 198 failed_ = true; 199 return NULL; 200#else 201 // Check if we should use sbrk allocation. 202 // FLAGS_malloc_skip_sbrk starts out as false (its uninitialized 203 // state) and eventually gets initialized to the specified value. Note 204 // that this code runs for a while before the flags are initialized. 205 // That means that even if this flag is set to true, some (initial) 206 // memory will be allocated with sbrk before the flag takes effect. 207 if (FLAGS_malloc_skip_sbrk) { 208 return NULL; 209 } 210 211 // sbrk will release memory if passed a negative number, so we do 212 // a strict check here 213 if (static_cast<ptrdiff_t>(size + alignment) < 0) return NULL; 214 215 // This doesn't overflow because TCMalloc_SystemAlloc has already 216 // tested for overflow at the alignment boundary. 217 size = ((size + alignment - 1) / alignment) * alignment; 218 219 // "actual_size" indicates that the bytes from the returned pointer 220 // p up to and including (p + actual_size - 1) have been allocated. 221 if (actual_size) { 222 *actual_size = size; 223 } 224 225 // Check that we we're not asking for so much more memory that we'd 226 // wrap around the end of the virtual address space. (This seems 227 // like something sbrk() should check for us, and indeed opensolaris 228 // does, but glibc does not: 229 // http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/lib/libc/port/sys/sbrk.c?a=true 230 // http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/libc/misc/sbrk.c?rev=1.1.2.1&content-type=text/plain&cvsroot=glibc 231 // Without this check, sbrk may succeed when it ought to fail.) 232 if (reinterpret_cast<intptr_t>(sbrk(0)) + size < size) { 233 return NULL; 234 } 235 236 void* result = sbrk(size); 237 if (result == reinterpret_cast<void*>(-1)) { 238 return NULL; 239 } 240 241 // Is it aligned? 242 uintptr_t ptr = reinterpret_cast<uintptr_t>(result); 243 if ((ptr & (alignment-1)) == 0) return result; 244 245 // Try to get more memory for alignment 246 size_t extra = alignment - (ptr & (alignment-1)); 247 void* r2 = sbrk(extra); 248 if (reinterpret_cast<uintptr_t>(r2) == (ptr + size)) { 249 // Contiguous with previous result 250 return reinterpret_cast<void*>(ptr + extra); 251 } 252 253 // Give up and ask for "size + alignment - 1" bytes so 254 // that we can find an aligned region within it. 255 result = sbrk(size + alignment - 1); 256 if (result == reinterpret_cast<void*>(-1)) { 257 return NULL; 258 } 259 ptr = reinterpret_cast<uintptr_t>(result); 260 if ((ptr & (alignment-1)) != 0) { 261 ptr += alignment - (ptr & (alignment-1)); 262 } 263 return reinterpret_cast<void*>(ptr); 264#endif // HAVE_SBRK 265} 266 267void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, 268 size_t alignment) { 269#ifndef HAVE_MMAP 270 failed_ = true; 271 return NULL; 272#else 273 // Check if we should use mmap allocation. 274 // FLAGS_malloc_skip_mmap starts out as false (its uninitialized 275 // state) and eventually gets initialized to the specified value. Note 276 // that this code runs for a while before the flags are initialized. 277 // Chances are we never get here before the flags are initialized since 278 // sbrk is used until the heap is exhausted (before mmap is used). 279 if (FLAGS_malloc_skip_mmap) { 280 return NULL; 281 } 282 283 // Enforce page alignment 284 if (pagesize == 0) pagesize = getpagesize(); 285 if (alignment < pagesize) alignment = pagesize; 286 size_t aligned_size = ((size + alignment - 1) / alignment) * alignment; 287 if (aligned_size < size) { 288 return NULL; 289 } 290 size = aligned_size; 291 292 // "actual_size" indicates that the bytes from the returned pointer 293 // p up to and including (p + actual_size - 1) have been allocated. 294 if (actual_size) { 295 *actual_size = size; 296 } 297 298 // Ask for extra memory if alignment > pagesize 299 size_t extra = 0; 300 if (alignment > pagesize) { 301 extra = alignment - pagesize; 302 } 303 304 // Note: size + extra does not overflow since: 305 // size + alignment < (1<<NBITS). 306 // and extra <= alignment 307 // therefore size + extra < (1<<NBITS) 308 void* result = mmap(NULL, size + extra, 309 PROT_READ|PROT_WRITE, 310 MAP_PRIVATE|MAP_ANONYMOUS, 311 -1, 0); 312 if (result == reinterpret_cast<void*>(MAP_FAILED)) { 313 return NULL; 314 } 315 316 // Adjust the return memory so it is aligned 317 uintptr_t ptr = reinterpret_cast<uintptr_t>(result); 318 size_t adjust = 0; 319 if ((ptr & (alignment - 1)) != 0) { 320 adjust = alignment - (ptr & (alignment - 1)); 321 } 322 323 // Return the unused memory to the system 324 if (adjust > 0) { 325 munmap(reinterpret_cast<void*>(ptr), adjust); 326 } 327 if (adjust < extra) { 328 munmap(reinterpret_cast<void*>(ptr + adjust + size), extra - adjust); 329 } 330 331 ptr += adjust; 332 return reinterpret_cast<void*>(ptr); 333#endif // HAVE_MMAP 334} 335 336void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, 337 size_t alignment) { 338#ifndef HAVE_MMAP 339 failed_ = true; 340 return NULL; 341#else 342 static bool initialized = false; 343 static off_t physmem_base; // next physical memory address to allocate 344 static off_t physmem_limit; // maximum physical address allowed 345 static int physmem_fd; // file descriptor for /dev/mem 346 347 // Check if we should use /dev/mem allocation. Note that it may take 348 // a while to get this flag initialized, so meanwhile we fall back to 349 // the next allocator. (It looks like 7MB gets allocated before 350 // this flag gets initialized -khr.) 351 if (FLAGS_malloc_devmem_start == 0) { 352 // NOTE: not a devmem_failure - we'd like TCMalloc_SystemAlloc to 353 // try us again next time. 354 return NULL; 355 } 356 357 if (!initialized) { 358 physmem_fd = open("/dev/mem", O_RDWR); 359 if (physmem_fd < 0) { 360 return NULL; 361 } 362 physmem_base = FLAGS_malloc_devmem_start*1024LL*1024LL; 363 physmem_limit = FLAGS_malloc_devmem_limit*1024LL*1024LL; 364 initialized = true; 365 } 366 367 // Enforce page alignment 368 if (pagesize == 0) pagesize = getpagesize(); 369 if (alignment < pagesize) alignment = pagesize; 370 size_t aligned_size = ((size + alignment - 1) / alignment) * alignment; 371 if (aligned_size < size) { 372 return NULL; 373 } 374 size = aligned_size; 375 376 // "actual_size" indicates that the bytes from the returned pointer 377 // p up to and including (p + actual_size - 1) have been allocated. 378 if (actual_size) { 379 *actual_size = size; 380 } 381 382 // Ask for extra memory if alignment > pagesize 383 size_t extra = 0; 384 if (alignment > pagesize) { 385 extra = alignment - pagesize; 386 } 387 388 // check to see if we have any memory left 389 if (physmem_limit != 0 && 390 ((size + extra) > (physmem_limit - physmem_base))) { 391 return NULL; 392 } 393 394 // Note: size + extra does not overflow since: 395 // size + alignment < (1<<NBITS). 396 // and extra <= alignment 397 // therefore size + extra < (1<<NBITS) 398 void *result = mmap(0, size + extra, PROT_WRITE|PROT_READ, 399 MAP_SHARED, physmem_fd, physmem_base); 400 if (result == reinterpret_cast<void*>(MAP_FAILED)) { 401 return NULL; 402 } 403 uintptr_t ptr = reinterpret_cast<uintptr_t>(result); 404 405 // Adjust the return memory so it is aligned 406 size_t adjust = 0; 407 if ((ptr & (alignment - 1)) != 0) { 408 adjust = alignment - (ptr & (alignment - 1)); 409 } 410 411 // Return the unused virtual memory to the system 412 if (adjust > 0) { 413 munmap(reinterpret_cast<void*>(ptr), adjust); 414 } 415 if (adjust < extra) { 416 munmap(reinterpret_cast<void*>(ptr + adjust + size), extra - adjust); 417 } 418 419 ptr += adjust; 420 physmem_base += adjust + size; 421 422 return reinterpret_cast<void*>(ptr); 423#endif // HAVE_MMAP 424} 425 426void* DefaultSysAllocator::Alloc(size_t size, size_t *actual_size, 427 size_t alignment) { 428 for (int i = 0; i < kMaxAllocators; i++) { 429 if (!failed_[i] && allocs_[i] != NULL) { 430 void* result = allocs_[i]->Alloc(size, actual_size, alignment); 431 if (result != NULL) { 432 return result; 433 } 434 failed_[i] = true; 435 } 436 } 437 // After both failed, reset "failed_" to false so that a single failed 438 // allocation won't make the allocator never work again. 439 for (int i = 0; i < kMaxAllocators; i++) { 440 failed_[i] = false; 441 } 442 return NULL; 443} 444 445static bool system_alloc_inited = false; 446void InitSystemAllocators(void) { 447 MmapSysAllocator *mmap = new (mmap_space) MmapSysAllocator(); 448 SbrkSysAllocator *sbrk = new (sbrk_space) SbrkSysAllocator(); 449 450 // In 64-bit debug mode, place the mmap allocator first since it 451 // allocates pointers that do not fit in 32 bits and therefore gives 452 // us better testing of code's 64-bit correctness. It also leads to 453 // less false negatives in heap-checking code. (Numbers are less 454 // likely to look like pointers and therefore the conservative gc in 455 // the heap-checker is less likely to misinterpret a number as a 456 // pointer). 457 DefaultSysAllocator *sdef = new (default_space) DefaultSysAllocator(); 458 if (kDebugMode && sizeof(void*) > 4) { 459 sdef->SetChildAllocator(mmap, 0, mmap_name); 460 sdef->SetChildAllocator(sbrk, 1, sbrk_name); 461 } else { 462 sdef->SetChildAllocator(sbrk, 0, sbrk_name); 463 sdef->SetChildAllocator(mmap, 1, mmap_name); 464 } 465 sys_alloc = sdef; 466} 467 468void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size, 469 size_t alignment) { 470 // Discard requests that overflow 471 if (size + alignment < size) return NULL; 472 473 SpinLockHolder lock_holder(&spinlock); 474 475 if (!system_alloc_inited) { 476 InitSystemAllocators(); 477 system_alloc_inited = true; 478 } 479 480 // Enforce minimum alignment 481 if (alignment < sizeof(MemoryAligner)) alignment = sizeof(MemoryAligner); 482 483 void* result = sys_alloc->Alloc(size, actual_size, alignment); 484 if (result != NULL) { 485 if (actual_size) { 486 CheckAddressBits<kAddressBits>( 487 reinterpret_cast<uintptr_t>(result) + *actual_size - 1); 488 } else { 489 CheckAddressBits<kAddressBits>( 490 reinterpret_cast<uintptr_t>(result) + size - 1); 491 } 492 } 493 return result; 494} 495 496void TCMalloc_SystemRelease(void* start, size_t length) { 497#ifdef MADV_FREE 498 if (FLAGS_malloc_devmem_start) { 499 // It's not safe to use MADV_FREE/MADV_DONTNEED if we've been 500 // mapping /dev/mem for heap memory. 501 return; 502 } 503 if (pagesize == 0) pagesize = getpagesize(); 504 const size_t pagemask = pagesize - 1; 505 506 size_t new_start = reinterpret_cast<size_t>(start); 507 size_t end = new_start + length; 508 size_t new_end = end; 509 510 // Round up the starting address and round down the ending address 511 // to be page aligned: 512 new_start = (new_start + pagesize - 1) & ~pagemask; 513 new_end = new_end & ~pagemask; 514 515 ASSERT((new_start & pagemask) == 0); 516 ASSERT((new_end & pagemask) == 0); 517 ASSERT(new_start >= reinterpret_cast<size_t>(start)); 518 ASSERT(new_end <= end); 519 520 if (new_end > new_start) { 521 // Note -- ignoring most return codes, because if this fails it 522 // doesn't matter... 523 while (madvise(reinterpret_cast<char*>(new_start), new_end - new_start, 524 MADV_FREE) == -1 && 525 errno == EAGAIN) { 526 // NOP 527 } 528 } 529#endif 530} 531