1// Copyright (c) 2005, 2007, Google Inc. 2// All rights reserved. 3// Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011 Apple Inc. All rights reserved. 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions are 7// met: 8// 9// * Redistributions of source code must retain the above copyright 10// notice, this list of conditions and the following disclaimer. 11// * Redistributions in binary form must reproduce the above 12// copyright notice, this list of conditions and the following disclaimer 13// in the documentation and/or other materials provided with the 14// distribution. 15// * Neither the name of Google Inc. nor the names of its 16// contributors may be used to endorse or promote products derived from 17// this software without specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31// --- 32// Author: Sanjay Ghemawat <opensource@google.com> 33// 34// A malloc that uses a per-thread cache to satisfy small malloc requests. 35// (The time for malloc/free of a small object drops from 300 ns to 50 ns.) 36// 37// See doc/tcmalloc.html for a high-level 38// description of how this malloc works. 39// 40// SYNCHRONIZATION 41// 1. The thread-specific lists are accessed without acquiring any locks. 42// This is safe because each such list is only accessed by one thread. 43// 2. We have a lock per central free-list, and hold it while manipulating 44// the central free list for a particular size. 45// 3. The central page allocator is protected by "pageheap_lock". 46// 4. The pagemap (which maps from page-number to descriptor), 47// can be read without holding any locks, and written while holding 48// the "pageheap_lock". 49// 5. To improve performance, a subset of the information one can get 50// from the pagemap is cached in a data structure, pagemap_cache_, 51// that atomically reads and writes its entries. This cache can be 52// read and written without locking. 53// 54// This multi-threaded access to the pagemap is safe for fairly 55// subtle reasons. We basically assume that when an object X is 56// allocated by thread A and deallocated by thread B, there must 57// have been appropriate synchronization in the handoff of object 58// X from thread A to thread B. The same logic applies to pagemap_cache_. 59// 60// THE PAGEID-TO-SIZECLASS CACHE 61// Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache 62// returns 0 for a particular PageID then that means "no information," not that 63// the sizeclass is 0. The cache may have stale information for pages that do 64// not hold the beginning of any free()'able object. Staleness is eliminated 65// in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and 66// do_memalign() for all other relevant pages. 67// 68// TODO: Bias reclamation to larger addresses 69// TODO: implement mallinfo/mallopt 70// TODO: Better testing 71// 72// 9/28/2003 (new page-level allocator replaces ptmalloc2): 73// * malloc/free of small objects goes from ~300 ns to ~50 ns. 74// * allocation of a reasonably complicated struct 75// goes from about 1100 ns to about 300 ns. 76 77#include "config.h" 78#include "wtf/FastMalloc.h" 79 80#include "wtf/Assertions.h" 81#include "wtf/CPU.h" 82#include "wtf/StdLibExtras.h" 83 84#if OS(MACOSX) 85#include <AvailabilityMacros.h> 86#endif 87 88#include <limits> 89#if OS(WIN) 90#include <windows.h> 91#else 92#include <pthread.h> 93#endif 94#include <stdlib.h> 95#include <string.h> 96 97#ifndef NO_TCMALLOC_SAMPLES 98#define NO_TCMALLOC_SAMPLES 99#endif 100 101#if !USE(SYSTEM_MALLOC) && defined(NDEBUG) 102#define FORCE_SYSTEM_MALLOC 0 103#else 104#define FORCE_SYSTEM_MALLOC 1 105#endif 106 107// Harden the pointers stored in the TCMalloc linked lists 108#if COMPILER(GCC) 109#define ENABLE_TCMALLOC_HARDENING 1 110#endif 111 112// Use a background thread to periodically scavenge memory to release back to the system 113#define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1 114 115#ifndef NDEBUG 116namespace WTF { 117 118#if OS(WIN) 119 120// TLS_OUT_OF_INDEXES is not defined on WinCE. 121#ifndef TLS_OUT_OF_INDEXES 122#define TLS_OUT_OF_INDEXES 0xffffffff 123#endif 124 125static DWORD isForibiddenTlsIndex = TLS_OUT_OF_INDEXES; 126static const LPVOID kTlsAllowValue = reinterpret_cast<LPVOID>(0); // Must be zero. 127static const LPVOID kTlsForbiddenValue = reinterpret_cast<LPVOID>(1); 128 129#if !ASSERT_DISABLED 130static bool isForbidden() 131{ 132 // By default, fastMalloc is allowed so we don't allocate the 133 // tls index unless we're asked to make it forbidden. If TlsSetValue 134 // has not been called on a thread, the value returned by TlsGetValue is 0. 135 return (isForibiddenTlsIndex != TLS_OUT_OF_INDEXES) && (TlsGetValue(isForibiddenTlsIndex) == kTlsForbiddenValue); 136} 137#endif 138 139void fastMallocForbid() 140{ 141 if (isForibiddenTlsIndex == TLS_OUT_OF_INDEXES) 142 isForibiddenTlsIndex = TlsAlloc(); // a little racey, but close enough for debug only 143 TlsSetValue(isForibiddenTlsIndex, kTlsForbiddenValue); 144} 145 146void fastMallocAllow() 147{ 148 if (isForibiddenTlsIndex == TLS_OUT_OF_INDEXES) 149 return; 150 TlsSetValue(isForibiddenTlsIndex, kTlsAllowValue); 151} 152 153#else // !OS(WIN) 154 155static pthread_key_t isForbiddenKey; 156static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT; 157static void initializeIsForbiddenKey() 158{ 159 pthread_key_create(&isForbiddenKey, 0); 160} 161 162#if !ASSERT_DISABLED 163static bool isForbidden() 164{ 165 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); 166 return !!pthread_getspecific(isForbiddenKey); 167} 168#endif 169 170void fastMallocForbid() 171{ 172 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); 173 pthread_setspecific(isForbiddenKey, &isForbiddenKey); 174} 175 176void fastMallocAllow() 177{ 178 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); 179 pthread_setspecific(isForbiddenKey, 0); 180} 181#endif // OS(WIN) 182 183} // namespace WTF 184#endif // NDEBUG 185 186namespace WTF { 187 188void* fastZeroedMalloc(size_t n) 189{ 190 void* result = fastMalloc(n); 191 memset(result, 0, n); 192 return result; 193} 194 195char* fastStrDup(const char* src) 196{ 197 size_t len = strlen(src) + 1; 198 char* dup = static_cast<char*>(fastMalloc(len)); 199 memcpy(dup, src, len); 200 return dup; 201} 202 203} // namespace WTF 204 205#if FORCE_SYSTEM_MALLOC 206 207#if OS(MACOSX) 208#include <malloc/malloc.h> 209#elif OS(WIN) 210#include <malloc.h> 211#endif 212 213namespace WTF { 214 215void* fastMalloc(size_t n) 216{ 217 ASSERT(!isForbidden()); 218 219 void* result = malloc(n); 220 ASSERT(result); // We expect tcmalloc underneath, which would crash instead of getting here. 221 222 return result; 223} 224 225void* fastCalloc(size_t n_elements, size_t element_size) 226{ 227 ASSERT(!isForbidden()); 228 229 void* result = calloc(n_elements, element_size); 230 ASSERT(result); // We expect tcmalloc underneath, which would crash instead of getting here. 231 232 return result; 233} 234 235void fastFree(void* p) 236{ 237 ASSERT(!isForbidden()); 238 239 free(p); 240} 241 242void* fastRealloc(void* p, size_t n) 243{ 244 ASSERT(!isForbidden()); 245 246 void* result = realloc(p, n); 247 ASSERT(result); // We expect tcmalloc underneath, which would crash instead of getting here. 248 249 return result; 250} 251 252void releaseFastMallocFreeMemory() { } 253 254FastMallocStatistics fastMallocStatistics() 255{ 256 FastMallocStatistics statistics = { 0, 0, 0 }; 257 return statistics; 258} 259 260} // namespace WTF 261 262#if OS(MACOSX) 263// This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled. 264// It will never be used in this case, so it's type and value are less interesting than its presence. 265extern "C" const int jscore_fastmalloc_introspection = 0; 266#endif 267 268#else // FORCE_SYSTEM_MALLOC 269 270#include "Compiler.h" 271#include "TCPackedCache.h" 272#include "TCPageMap.h" 273#include "TCSpinLock.h" 274#include "TCSystemAlloc.h" 275#include <algorithm> 276#include <errno.h> 277#include <pthread.h> 278#include <stdarg.h> 279#include <stddef.h> 280#if OS(POSIX) 281#include <unistd.h> 282#endif 283#if OS(WIN) 284#ifndef WIN32_LEAN_AND_MEAN 285#define WIN32_LEAN_AND_MEAN 286#endif 287#include <windows.h> 288#endif 289 290#if OS(MACOSX) 291#include "MallocZoneSupport.h" 292#include "wtf/HashSet.h" 293#include "wtf/Vector.h" 294#else 295#include "wtf/CurrentTime.h" 296#endif 297 298#if HAVE(DISPATCH_H) 299#include <dispatch/dispatch.h> 300#endif 301 302#ifndef PRIuS 303#define PRIuS "zu" 304#endif 305 306// Calling pthread_getspecific through a global function pointer is faster than a normal 307// call to the function on Mac OS X, and it's used in performance-critical code. So we 308// use a function pointer. But that's not necessarily faster on other platforms, and we had 309// problems with this technique on Windows, so we'll do this only on Mac OS X. 310#if OS(MACOSX) 311static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific; 312#define pthread_getspecific(key) pthread_getspecific_function_pointer(key) 313#endif 314 315#define DEFINE_VARIABLE(type, name, value, meaning) \ 316 namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \ 317 type FLAGS_##name(value); \ 318 char FLAGS_no##name; \ 319 } \ 320 using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name 321 322#define DEFINE_int64(name, value, meaning) \ 323 DEFINE_VARIABLE(int64_t, name, value, meaning) 324 325#define DEFINE_double(name, value, meaning) \ 326 DEFINE_VARIABLE(double, name, value, meaning) 327 328namespace WTF { 329 330#define malloc fastMalloc 331#define calloc fastCalloc 332#define free fastFree 333#define realloc fastRealloc 334 335#define MESSAGE WTF_LOG_ERROR 336#define CHECK_CONDITION ASSERT 337 338#if !OS(MACOSX) 339static const char kLLHardeningMask = 0; 340#endif 341 342template <unsigned> struct EntropySource; 343template <> struct EntropySource<4> { 344 static uint32_t value() 345 { 346#if OS(MACOSX) 347 return arc4random(); 348#else 349 return static_cast<uint32_t>(static_cast<uintptr_t>(currentTime() * 10000) ^ reinterpret_cast<uintptr_t>(&kLLHardeningMask)); 350#endif 351 } 352}; 353 354template <> struct EntropySource<8> { 355 static uint64_t value() 356 { 357 return EntropySource<4>::value() | (static_cast<uint64_t>(EntropySource<4>::value()) << 32); 358 } 359}; 360 361#if ENABLE(TCMALLOC_HARDENING) 362/* 363 * To make it harder to exploit use-after free style exploits 364 * we mask the addresses we put into our linked lists with the 365 * address of kLLHardeningMask. Due to ASLR the address of 366 * kLLHardeningMask should be sufficiently randomized to make direct 367 * freelist manipulation much more difficult. 368 */ 369enum { 370 MaskKeyShift = 13 371}; 372 373static ALWAYS_INLINE uintptr_t internalEntropyValue() 374{ 375 static uintptr_t value = EntropySource<sizeof(uintptr_t)>::value() | 1; 376 ASSERT(value); 377 return value; 378} 379 380#define HARDENING_ENTROPY internalEntropyValue() 381#define ROTATE_VALUE(value, amount) (((value) >> (amount)) | ((value) << (sizeof(value) * 8 - (amount)))) 382#define XOR_MASK_PTR_WITH_KEY(ptr, key, entropy) (reinterpret_cast<typeof(ptr)>(reinterpret_cast<uintptr_t>(ptr)^(ROTATE_VALUE(reinterpret_cast<uintptr_t>(key), MaskKeyShift)^entropy))) 383 384 385static ALWAYS_INLINE uint32_t freedObjectStartPoison() 386{ 387 static uint32_t value = EntropySource<sizeof(uint32_t)>::value() | 1; 388 ASSERT(value); 389 return value; 390} 391 392static ALWAYS_INLINE uint32_t freedObjectEndPoison() 393{ 394 static uint32_t value = EntropySource<sizeof(uint32_t)>::value() | 1; 395 ASSERT(value); 396 return value; 397} 398 399#define PTR_TO_UINT32(ptr) static_cast<uint32_t>(reinterpret_cast<uintptr_t>(ptr)) 400#define END_POISON_INDEX(allocationSize) (((allocationSize) - sizeof(uint32_t)) / sizeof(uint32_t)) 401#define POISON_ALLOCATION(allocation, allocationSize) do { \ 402 ASSERT((allocationSize) >= 2 * sizeof(uint32_t)); \ 403 reinterpret_cast<uint32_t*>(allocation)[0] = 0xbadbeef1; \ 404 reinterpret_cast<uint32_t*>(allocation)[1] = 0xbadbeef3; \ 405 if ((allocationSize) < 4 * sizeof(uint32_t)) \ 406 break; \ 407 reinterpret_cast<uint32_t*>(allocation)[2] = 0xbadbeef5; \ 408 reinterpret_cast<uint32_t*>(allocation)[END_POISON_INDEX(allocationSize)] = 0xbadbeef7; \ 409} while (false); 410 411#define POISON_DEALLOCATION_EXPLICIT(allocation, allocationSize, startPoison, endPoison) do { \ 412 ASSERT((allocationSize) >= 2 * sizeof(uint32_t)); \ 413 reinterpret_cast<uint32_t*>(allocation)[0] = 0xbadbeef9; \ 414 reinterpret_cast<uint32_t*>(allocation)[1] = 0xbadbeefb; \ 415 if ((allocationSize) < 4 * sizeof(uint32_t)) \ 416 break; \ 417 reinterpret_cast<uint32_t*>(allocation)[2] = (startPoison) ^ PTR_TO_UINT32(allocation); \ 418 reinterpret_cast<uint32_t*>(allocation)[END_POISON_INDEX(allocationSize)] = (endPoison) ^ PTR_TO_UINT32(allocation); \ 419} while (false) 420 421#define POISON_DEALLOCATION(allocation, allocationSize) \ 422 POISON_DEALLOCATION_EXPLICIT(allocation, (allocationSize), freedObjectStartPoison(), freedObjectEndPoison()) 423 424#define MAY_BE_POISONED(allocation, allocationSize) (((allocationSize) >= 4 * sizeof(uint32_t)) && ( \ 425 (reinterpret_cast<uint32_t*>(allocation)[2] == (freedObjectStartPoison() ^ PTR_TO_UINT32(allocation))) || \ 426 (reinterpret_cast<uint32_t*>(allocation)[END_POISON_INDEX(allocationSize)] == (freedObjectEndPoison() ^ PTR_TO_UINT32(allocation))) \ 427)) 428 429#define IS_DEFINITELY_POISONED(allocation, allocationSize) (((allocationSize) < 4 * sizeof(uint32_t)) || ( \ 430 (reinterpret_cast<uint32_t*>(allocation)[2] == (freedObjectStartPoison() ^ PTR_TO_UINT32(allocation))) && \ 431 (reinterpret_cast<uint32_t*>(allocation)[END_POISON_INDEX(allocationSize)] == (freedObjectEndPoison() ^ PTR_TO_UINT32(allocation))) \ 432)) 433 434#else 435 436#define POISON_ALLOCATION(allocation, allocationSize) 437#define POISON_DEALLOCATION(allocation, allocationSize) 438#define POISON_DEALLOCATION_EXPLICIT(allocation, allocationSize, startPoison, endPoison) 439#define MAY_BE_POISONED(allocation, allocationSize) (false) 440#define IS_DEFINITELY_POISONED(allocation, allocationSize) (true) 441#define XOR_MASK_PTR_WITH_KEY(ptr, key, entropy) (((void)entropy), ((void)key), ptr) 442 443#define HARDENING_ENTROPY 0 444 445#endif 446 447//------------------------------------------------------------------- 448// Configuration 449//------------------------------------------------------------------- 450 451// Not all possible combinations of the following parameters make 452// sense. In particular, if kMaxSize increases, you may have to 453// increase kNumClasses as well. 454static const size_t kPageShift = 12; 455static const size_t kPageSize = 1 << kPageShift; 456static const size_t kMaxSize = 8u * kPageSize; 457static const size_t kAlignShift = 3; 458static const size_t kAlignment = 1 << kAlignShift; 459static const size_t kNumClasses = 68; 460 461// Allocates a big block of memory for the pagemap once we reach more than 462// 128MB 463static const size_t kPageMapBigAllocationThreshold = 128 << 20; 464 465// Minimum number of pages to fetch from system at a time. Must be 466// significantly bigger than kPageSize to amortize system-call 467// overhead, and also to reduce external fragementation. Also, we 468// should keep this value big because various incarnations of Linux 469// have small limits on the number of mmap() regions per 470// address-space. 471static const size_t kMinSystemAlloc = 1 << (20 - kPageShift); 472 473// Number of objects to move between a per-thread list and a central 474// list in one shot. We want this to be not too small so we can 475// amortize the lock overhead for accessing the central list. Making 476// it too big may temporarily cause unnecessary memory wastage in the 477// per-thread free list until the scavenger cleans up the list. 478static int num_objects_to_move[kNumClasses]; 479 480// Maximum length we allow a per-thread free-list to have before we 481// move objects from it into the corresponding central free-list. We 482// want this big to avoid locking the central free-list too often. It 483// should not hurt to make this list somewhat big because the 484// scavenging code will shrink it down when its contents are not in use. 485static const int kMaxFreeListLength = 256; 486 487// Lower and upper bounds on the per-thread cache sizes 488static const size_t kMinThreadCacheSize = kMaxSize * 2; 489static const size_t kMaxThreadCacheSize = 2 << 20; 490 491// Default bound on the total amount of thread caches 492static const size_t kDefaultOverallThreadCacheSize = 16 << 20; 493 494// For all span-lengths < kMaxPages we keep an exact-size list. 495// REQUIRED: kMaxPages >= kMinSystemAlloc; 496static const size_t kMaxPages = kMinSystemAlloc; 497 498/* The smallest prime > 2^n */ 499static int primes_list[] = { 500 // Small values might cause high rates of sampling 501 // and hence commented out. 502 // 2, 5, 11, 17, 37, 67, 131, 257, 503 // 521, 1031, 2053, 4099, 8209, 16411, 504 32771, 65537, 131101, 262147, 524309, 1048583, 505 2097169, 4194319, 8388617, 16777259, 33554467 }; 506 507// Twice the approximate gap between sampling actions. 508// I.e., we take one sample approximately once every 509// tcmalloc_sample_parameter/2 510// bytes of allocation, i.e., ~ once every 128KB. 511// Must be a prime number. 512#ifdef NO_TCMALLOC_SAMPLES 513DEFINE_int64(tcmalloc_sample_parameter, 0, 514 "Unused: code is compiled with NO_TCMALLOC_SAMPLES"); 515static size_t sample_period = 0; 516#else 517DEFINE_int64(tcmalloc_sample_parameter, 262147, 518 "Twice the approximate gap between sampling actions." 519 " Must be a prime number. Otherwise will be rounded up to a " 520 " larger prime number"); 521static size_t sample_period = 262147; 522#endif 523 524// Protects sample_period above 525static SpinLock sample_period_lock = SPINLOCK_INITIALIZER; 526 527// Parameters for controlling how fast memory is returned to the OS. 528 529DEFINE_double(tcmalloc_release_rate, 1, 530 "Rate at which we release unused memory to the system. " 531 "Zero means we never release memory back to the system. " 532 "Increase this flag to return memory faster; decrease it " 533 "to return memory slower. Reasonable rates are in the " 534 "range [0,10]"); 535 536//------------------------------------------------------------------- 537// Mapping from size to size_class and vice versa 538//------------------------------------------------------------------- 539 540// Sizes <= 1024 have an alignment >= 8. So for such sizes we have an 541// array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128. 542// So for these larger sizes we have an array indexed by ceil(size/128). 543// 544// We flatten both logical arrays into one physical array and use 545// arithmetic to compute an appropriate index. The constants used by 546// ClassIndex() were selected to make the flattening work. 547// 548// Examples: 549// Size Expression Index 550// ------------------------------------------------------- 551// 0 (0 + 7) / 8 0 552// 1 (1 + 7) / 8 1 553// ... 554// 1024 (1024 + 7) / 8 128 555// 1025 (1025 + 127 + (120<<7)) / 128 129 556// ... 557// 32768 (32768 + 127 + (120<<7)) / 128 376 558static const size_t kMaxSmallSize = 1024; 559static const int shift_amount[2] = { 3, 7 }; // For divides by 8 or 128 560static const int add_amount[2] = { 7, 127 + (120 << 7) }; 561static unsigned char class_array[377]; 562 563// Compute index of the class_array[] entry for a given size 564static inline int ClassIndex(size_t s) { 565 const int i = (s > kMaxSmallSize); 566 return static_cast<int>((s + add_amount[i]) >> shift_amount[i]); 567} 568 569// Mapping from size class to max size storable in that class 570static size_t class_to_size[kNumClasses]; 571 572// Mapping from size class to number of pages to allocate at a time 573static size_t class_to_pages[kNumClasses]; 574 575// Hardened singly linked list. We make this a class to allow compiler to 576// statically prevent mismatching hardened and non-hardened list 577class HardenedSLL { 578public: 579 static ALWAYS_INLINE HardenedSLL create(void* value) 580 { 581 HardenedSLL result; 582 result.m_value = value; 583 return result; 584 } 585 586 static ALWAYS_INLINE HardenedSLL null() 587 { 588 HardenedSLL result; 589 result.m_value = 0; 590 return result; 591 } 592 593 ALWAYS_INLINE void setValue(void* value) { m_value = value; } 594 ALWAYS_INLINE void* value() const { return m_value; } 595 ALWAYS_INLINE bool operator!() const { return !m_value; } 596 typedef void* (HardenedSLL::*UnspecifiedBoolType); 597 ALWAYS_INLINE operator UnspecifiedBoolType() const { return m_value ? &HardenedSLL::m_value : 0; } 598 599 bool operator!=(const HardenedSLL& other) const { return m_value != other.m_value; } 600 bool operator==(const HardenedSLL& other) const { return m_value == other.m_value; } 601 602private: 603 void* m_value; 604}; 605 606// TransferCache is used to cache transfers of num_objects_to_move[size_class] 607// back and forth between thread caches and the central cache for a given size 608// class. 609struct TCEntry { 610 HardenedSLL head; // Head of chain of objects. 611 HardenedSLL tail; // Tail of chain of objects. 612}; 613// A central cache freelist can have anywhere from 0 to kNumTransferEntries 614// slots to put link list chains into. To keep memory usage bounded the total 615// number of TCEntries across size classes is fixed. Currently each size 616// class is initially given one TCEntry which also means that the maximum any 617// one class can have is kNumClasses. 618static const int kNumTransferEntries = kNumClasses; 619 620// Note: the following only works for "n"s that fit in 32-bits, but 621// that is fine since we only use it for small sizes. 622static inline int LgFloor(size_t n) { 623 int log = 0; 624 for (int i = 4; i >= 0; --i) { 625 int shift = (1 << i); 626 size_t x = n >> shift; 627 if (x != 0) { 628 n = x; 629 log += shift; 630 } 631 } 632 ASSERT(n == 1); 633 return log; 634} 635 636// Functions for using our simple hardened singly linked list 637static ALWAYS_INLINE HardenedSLL SLL_Next(HardenedSLL t, uintptr_t entropy) { 638 return HardenedSLL::create(XOR_MASK_PTR_WITH_KEY(*(reinterpret_cast<void**>(t.value())), t.value(), entropy)); 639} 640 641static ALWAYS_INLINE void SLL_SetNext(HardenedSLL t, HardenedSLL n, uintptr_t entropy) { 642 *(reinterpret_cast<void**>(t.value())) = XOR_MASK_PTR_WITH_KEY(n.value(), t.value(), entropy); 643} 644 645static ALWAYS_INLINE void SLL_Push(HardenedSLL* list, HardenedSLL element, uintptr_t entropy) { 646 SLL_SetNext(element, *list, entropy); 647 *list = element; 648} 649 650static ALWAYS_INLINE HardenedSLL SLL_Pop(HardenedSLL *list, uintptr_t entropy) { 651 HardenedSLL result = *list; 652 *list = SLL_Next(*list, entropy); 653 return result; 654} 655 656// Remove N elements from a linked list to which head points. head will be 657// modified to point to the new head. start and end will point to the first 658// and last nodes of the range. Note that end will point to NULL after this 659// function is called. 660 661static ALWAYS_INLINE void SLL_PopRange(HardenedSLL* head, int N, HardenedSLL *start, HardenedSLL *end, uintptr_t entropy) { 662 if (N == 0) { 663 *start = HardenedSLL::null(); 664 *end = HardenedSLL::null(); 665 return; 666 } 667 668 HardenedSLL tmp = *head; 669 for (int i = 1; i < N; ++i) { 670 tmp = SLL_Next(tmp, entropy); 671 } 672 673 *start = *head; 674 *end = tmp; 675 *head = SLL_Next(tmp, entropy); 676 // Unlink range from list. 677 SLL_SetNext(tmp, HardenedSLL::null(), entropy); 678} 679 680static ALWAYS_INLINE void SLL_PushRange(HardenedSLL *head, HardenedSLL start, HardenedSLL end, uintptr_t entropy) { 681 if (!start) return; 682 SLL_SetNext(end, *head, entropy); 683 *head = start; 684} 685 686// Setup helper functions. 687 688static ALWAYS_INLINE size_t SizeClass(size_t size) { 689 return class_array[ClassIndex(size)]; 690} 691 692// Get the byte-size for a specified class 693static ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) { 694 return class_to_size[cl]; 695} 696static int NumMoveSize(size_t size) { 697 if (size == 0) return 0; 698 // Use approx 64k transfers between thread and central caches. 699 int num = static_cast<int>(64.0 * 1024.0 / size); 700 if (num < 2) num = 2; 701 // Clamp well below kMaxFreeListLength to avoid ping pong between central 702 // and thread caches. 703 if (num > static_cast<int>(0.8 * kMaxFreeListLength)) 704 num = static_cast<int>(0.8 * kMaxFreeListLength); 705 706 // Also, avoid bringing in too many objects into small object free 707 // lists. There are lots of such lists, and if we allow each one to 708 // fetch too many at a time, we end up having to scavenge too often 709 // (especially when there are lots of threads and each thread gets a 710 // small allowance for its thread cache). 711 // 712 // TODO: Make thread cache free list sizes dynamic so that we do not 713 // have to equally divide a fixed resource amongst lots of threads. 714 if (num > 32) num = 32; 715 716 return num; 717} 718 719// Initialize the mapping arrays 720static void InitSizeClasses() { 721 // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] 722 if (ClassIndex(0) < 0) { 723 MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0)); 724 CRASH(); 725 } 726 if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) { 727 MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize)); 728 CRASH(); 729 } 730 731 // Compute the size classes we want to use 732 size_t sc = 1; // Next size class to assign 733 unsigned char alignshift = kAlignShift; 734 int last_lg = -1; 735 for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) { 736 int lg = LgFloor(size); 737 if (lg > last_lg) { 738 // Increase alignment every so often. 739 // 740 // Since we double the alignment every time size doubles and 741 // size >= 128, this means that space wasted due to alignment is 742 // at most 16/128 i.e., 12.5%. Plus we cap the alignment at 256 743 // bytes, so the space wasted as a percentage starts falling for 744 // sizes > 2K. 745 if ((lg >= 7) && (alignshift < 8)) { 746 alignshift++; 747 } 748 last_lg = lg; 749 } 750 751 // Allocate enough pages so leftover is less than 1/8 of total. 752 // This bounds wasted space to at most 12.5%. 753 size_t psize = kPageSize; 754 while ((psize % size) > (psize >> 3)) { 755 psize += kPageSize; 756 } 757 const size_t my_pages = psize >> kPageShift; 758 759 if (sc > 1 && my_pages == class_to_pages[sc-1]) { 760 // See if we can merge this into the previous class without 761 // increasing the fragmentation of the previous class. 762 const size_t my_objects = (my_pages << kPageShift) / size; 763 const size_t prev_objects = (class_to_pages[sc-1] << kPageShift) 764 / class_to_size[sc-1]; 765 if (my_objects == prev_objects) { 766 // Adjust last class to include this size 767 class_to_size[sc-1] = size; 768 continue; 769 } 770 } 771 772 // Add new class 773 class_to_pages[sc] = my_pages; 774 class_to_size[sc] = size; 775 sc++; 776 } 777 if (sc != kNumClasses) { 778 MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n", 779 sc, int(kNumClasses)); 780 CRASH(); 781 } 782 783 // Initialize the mapping arrays 784 int next_size = 0; 785 for (unsigned char c = 1; c < kNumClasses; c++) { 786 const size_t max_size_in_class = class_to_size[c]; 787 for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) { 788 class_array[ClassIndex(s)] = c; 789 } 790 next_size = static_cast<int>(max_size_in_class + kAlignment); 791 } 792 793 // Double-check sizes just to be safe 794 for (size_t size = 0; size <= kMaxSize; size++) { 795 const size_t sc = SizeClass(size); 796 if (sc == 0) { 797 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size); 798 CRASH(); 799 } 800 if (sc > 1 && size <= class_to_size[sc-1]) { 801 MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS 802 "\n", sc, size); 803 CRASH(); 804 } 805 if (sc >= kNumClasses) { 806 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size); 807 CRASH(); 808 } 809 const size_t s = class_to_size[sc]; 810 if (size > s) { 811 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc); 812 CRASH(); 813 } 814 if (s == 0) { 815 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc); 816 CRASH(); 817 } 818 } 819 820 // Initialize the num_objects_to_move array. 821 for (size_t cl = 1; cl < kNumClasses; ++cl) { 822 num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl)); 823 } 824} 825 826// ------------------------------------------------------------------------- 827// Simple allocator for objects of a specified type. External locking 828// is required before accessing one of these objects. 829// ------------------------------------------------------------------------- 830 831// Metadata allocator -- keeps stats about how many bytes allocated 832static uint64_t metadata_system_bytes = 0; 833static void* MetaDataAlloc(size_t bytes) { 834 void* result = TCMalloc_SystemAlloc(bytes, 0); 835 if (result != NULL) { 836 metadata_system_bytes += bytes; 837 } 838 return result; 839} 840 841template <class T> 842class PageHeapAllocator { 843 private: 844 // How much to allocate from system at a time 845 static const size_t kAllocIncrement = 32 << 10; 846 847 // Aligned size of T 848 static const size_t kAlignedSize 849 = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment); 850 851 // Free area from which to carve new objects 852 char* free_area_; 853 size_t free_avail_; 854 855 // Linked list of all regions allocated by this allocator 856 HardenedSLL allocated_regions_; 857 858 // Free list of already carved objects 859 HardenedSLL free_list_; 860 861 // Number of allocated but unfreed objects 862 int inuse_; 863 uintptr_t entropy_; 864 865 public: 866 void Init(uintptr_t entropy) { 867 ASSERT(kAlignedSize <= kAllocIncrement); 868 inuse_ = 0; 869 allocated_regions_ = HardenedSLL::null(); 870 free_area_ = NULL; 871 free_avail_ = 0; 872 free_list_.setValue(NULL); 873 entropy_ = entropy; 874 } 875 876 T* New() { 877 // Consult free list 878 void* result; 879 if (free_list_) { 880 result = free_list_.value(); 881 free_list_ = SLL_Next(free_list_, entropy_); 882 } else { 883 if (free_avail_ < kAlignedSize) { 884 // Need more room 885 char* new_allocation = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement)); 886 if (!new_allocation) 887 CRASH(); 888 889 HardenedSLL new_head = HardenedSLL::create(new_allocation); 890 SLL_SetNext(new_head, allocated_regions_, entropy_); 891 allocated_regions_ = new_head; 892 free_area_ = new_allocation + kAlignedSize; 893 free_avail_ = kAllocIncrement - kAlignedSize; 894 } 895 result = free_area_; 896 free_area_ += kAlignedSize; 897 free_avail_ -= kAlignedSize; 898 } 899 inuse_++; 900 return reinterpret_cast<T*>(result); 901 } 902 903 void Delete(T* p) { 904 HardenedSLL new_head = HardenedSLL::create(p); 905 SLL_SetNext(new_head, free_list_, entropy_); 906 free_list_ = new_head; 907 inuse_--; 908 } 909 910 int inuse() const { return inuse_; } 911 912#if OS(MACOSX) 913 template <class Recorder> 914 void recordAdministrativeRegions(Recorder& recorder, const RemoteMemoryReader& reader) 915 { 916 for (HardenedSLL adminAllocation = allocated_regions_; adminAllocation; adminAllocation.setValue(reader.nextEntryInHardenedLinkedList(reinterpret_cast<void**>(adminAllocation.value()), entropy_))) 917 recorder.recordRegion(reinterpret_cast<vm_address_t>(adminAllocation.value()), kAllocIncrement); 918 } 919#endif 920}; 921 922// ------------------------------------------------------------------------- 923// Span - a contiguous run of pages 924// ------------------------------------------------------------------------- 925 926// Type that can hold a page number 927typedef uintptr_t PageID; 928 929// Type that can hold the length of a run of pages 930typedef uintptr_t Length; 931 932static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift; 933 934// Convert byte size into pages. This won't overflow, but may return 935// an unreasonably large value if bytes is huge enough. 936static inline Length pages(size_t bytes) { 937 return (bytes >> kPageShift) + 938 ((bytes & (kPageSize - 1)) > 0 ? 1 : 0); 939} 940 941// Convert a user size into the number of bytes that will actually be 942// allocated 943static size_t AllocationSize(size_t bytes) { 944 if (bytes > kMaxSize) { 945 // Large object: we allocate an integral number of pages 946 ASSERT(bytes <= (kMaxValidPages << kPageShift)); 947 return pages(bytes) << kPageShift; 948 } else { 949 // Small object: find the size class to which it belongs 950 return ByteSizeForClass(SizeClass(bytes)); 951 } 952} 953 954enum { 955 kSpanCookieBits = 10, 956 kSpanCookieMask = (1 << 10) - 1, 957 kSpanThisShift = 7 958}; 959 960static uint32_t spanValidationCookie; 961static uint32_t spanInitializerCookie() 962{ 963 static uint32_t value = EntropySource<sizeof(uint32_t)>::value() & kSpanCookieMask; 964 spanValidationCookie = value; 965 return value; 966} 967 968// Information kept for a span (a contiguous run of pages). 969struct Span { 970 PageID start; // Starting page number 971 Length length; // Number of pages in span 972 Span* next(uintptr_t entropy) const { return XOR_MASK_PTR_WITH_KEY(m_next, this, entropy); } 973 Span* remoteNext(const Span* remoteSpanPointer, uintptr_t entropy) const { return XOR_MASK_PTR_WITH_KEY(m_next, remoteSpanPointer, entropy); } 974 Span* prev(uintptr_t entropy) const { return XOR_MASK_PTR_WITH_KEY(m_prev, this, entropy); } 975 void setNext(Span* next, uintptr_t entropy) { m_next = XOR_MASK_PTR_WITH_KEY(next, this, entropy); } 976 void setPrev(Span* prev, uintptr_t entropy) { m_prev = XOR_MASK_PTR_WITH_KEY(prev, this, entropy); } 977 978private: 979 Span* m_next; // Used when in link list 980 Span* m_prev; // Used when in link list 981public: 982 HardenedSLL objects; // Linked list of free objects 983 unsigned int free : 1; // Is the span free 984#ifndef NO_TCMALLOC_SAMPLES 985 unsigned int sample : 1; // Sampled object? 986#endif 987 unsigned int sizeclass : 8; // Size-class for small objects (or 0) 988 unsigned int refcount : 11; // Number of non-free objects 989 bool decommitted : 1; 990 void initCookie() 991 { 992 m_cookie = ((reinterpret_cast<uintptr_t>(this) >> kSpanThisShift) & kSpanCookieMask) ^ spanInitializerCookie(); 993 } 994 void clearCookie() { m_cookie = 0; } 995 bool isValid() const 996 { 997 return (((reinterpret_cast<uintptr_t>(this) >> kSpanThisShift) & kSpanCookieMask) ^ m_cookie) == spanValidationCookie; 998 } 999private: 1000 uint32_t m_cookie : kSpanCookieBits; 1001 1002#undef SPAN_HISTORY 1003#ifdef SPAN_HISTORY 1004 // For debugging, we can keep a log events per span 1005 int nexthistory; 1006 char history[64]; 1007 int value[64]; 1008#endif 1009}; 1010 1011#define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted) 1012 1013#ifdef SPAN_HISTORY 1014void Event(Span* span, char op, int v = 0) { 1015 span->history[span->nexthistory] = op; 1016 span->value[span->nexthistory] = v; 1017 span->nexthistory++; 1018 if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0; 1019} 1020#else 1021#define Event(s,o,v) ((void) 0) 1022#endif 1023 1024// Allocator/deallocator for spans 1025static PageHeapAllocator<Span> span_allocator; 1026static Span* NewSpan(PageID p, Length len) { 1027 Span* result = span_allocator.New(); 1028 memset(result, 0, sizeof(*result)); 1029 result->start = p; 1030 result->length = len; 1031 result->initCookie(); 1032#ifdef SPAN_HISTORY 1033 result->nexthistory = 0; 1034#endif 1035 return result; 1036} 1037 1038static inline void DeleteSpan(Span* span) { 1039 RELEASE_ASSERT(span->isValid()); 1040#ifndef NDEBUG 1041 // In debug mode, trash the contents of deleted Spans 1042 memset(span, 0x3f, sizeof(*span)); 1043#endif 1044 span->clearCookie(); 1045 span_allocator.Delete(span); 1046} 1047 1048// ------------------------------------------------------------------------- 1049// Doubly linked list of spans. 1050// ------------------------------------------------------------------------- 1051 1052static inline void DLL_Init(Span* list, uintptr_t entropy) { 1053 list->setNext(list, entropy); 1054 list->setPrev(list, entropy); 1055} 1056 1057static inline void DLL_Remove(Span* span, uintptr_t entropy) { 1058 span->prev(entropy)->setNext(span->next(entropy), entropy); 1059 span->next(entropy)->setPrev(span->prev(entropy), entropy); 1060 span->setPrev(NULL, entropy); 1061 span->setNext(NULL, entropy); 1062} 1063 1064static ALWAYS_INLINE bool DLL_IsEmpty(const Span* list, uintptr_t entropy) { 1065 return list->next(entropy) == list; 1066} 1067 1068static int DLL_Length(const Span* list, uintptr_t entropy) { 1069 int result = 0; 1070 for (Span* s = list->next(entropy); s != list; s = s->next(entropy)) { 1071 result++; 1072 } 1073 return result; 1074} 1075 1076#if 0 /* Not needed at the moment -- causes compiler warnings if not used */ 1077static void DLL_Print(const char* label, const Span* list) { 1078 MESSAGE("%-10s %p:", label, list); 1079 for (const Span* s = list->next; s != list; s = s->next) { 1080 MESSAGE(" <%p,%u,%u>", s, s->start, s->length); 1081 } 1082 MESSAGE("\n"); 1083} 1084#endif 1085 1086static inline void DLL_Prepend(Span* list, Span* span, uintptr_t entropy) { 1087 span->setNext(list->next(entropy), entropy); 1088 span->setPrev(list, entropy); 1089 list->next(entropy)->setPrev(span, entropy); 1090 list->setNext(span, entropy); 1091} 1092 1093//------------------------------------------------------------------- 1094// Data kept per size-class in central cache 1095//------------------------------------------------------------------- 1096 1097class TCMalloc_Central_FreeList { 1098 public: 1099 void Init(size_t cl, uintptr_t entropy); 1100 1101 // These methods all do internal locking. 1102 1103 // Insert the specified range into the central freelist. N is the number of 1104 // elements in the range. 1105 void InsertRange(HardenedSLL start, HardenedSLL end, int N); 1106 1107 // Returns the actual number of fetched elements into N. 1108 void RemoveRange(HardenedSLL* start, HardenedSLL* end, int *N); 1109 1110 // Returns the number of free objects in cache. 1111 size_t length() { 1112 SpinLockHolder h(&lock_); 1113 return counter_; 1114 } 1115 1116 // Returns the number of free objects in the transfer cache. 1117 int tc_length() { 1118 SpinLockHolder h(&lock_); 1119 return used_slots_ * num_objects_to_move[size_class_]; 1120 } 1121 1122 template <class Finder, class Reader> 1123 void enumerateFreeObjects(Finder& finder, const Reader& reader, TCMalloc_Central_FreeList* remoteCentralFreeList) 1124 { 1125 { 1126 static const ptrdiff_t emptyOffset = reinterpret_cast<const char*>(&empty_) - reinterpret_cast<const char*>(this); 1127 Span* remoteEmpty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + emptyOffset); 1128 Span* remoteSpan = nonempty_.remoteNext(remoteEmpty, entropy_); 1129 for (Span* span = reader(remoteEmpty); span && span != &empty_; remoteSpan = span->remoteNext(remoteSpan, entropy_), span = (remoteSpan ? reader(remoteSpan) : 0)) 1130 ASSERT(!span->objects); 1131 } 1132 1133 ASSERT(!nonempty_.objects); 1134 static const ptrdiff_t nonemptyOffset = reinterpret_cast<const char*>(&nonempty_) - reinterpret_cast<const char*>(this); 1135 1136 Span* remoteNonempty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + nonemptyOffset); 1137 Span* remoteSpan = nonempty_.remoteNext(remoteNonempty, entropy_); 1138 1139 for (Span* span = reader(remoteSpan); span && remoteSpan != remoteNonempty; remoteSpan = span->remoteNext(remoteSpan, entropy_), span = (remoteSpan ? reader(remoteSpan) : 0)) { 1140 for (HardenedSLL nextObject = span->objects; nextObject; nextObject.setValue(reader.nextEntryInHardenedLinkedList(reinterpret_cast<void**>(nextObject.value()), entropy_))) { 1141 finder.visit(nextObject.value()); 1142 } 1143 } 1144 } 1145 1146 uintptr_t entropy() const { return entropy_; } 1147 private: 1148 // REQUIRES: lock_ is held 1149 // Remove object from cache and return. 1150 // Return NULL if no free entries in cache. 1151 HardenedSLL FetchFromSpans(); 1152 1153 // REQUIRES: lock_ is held 1154 // Remove object from cache and return. Fetches 1155 // from pageheap if cache is empty. Only returns 1156 // NULL on allocation failure. 1157 HardenedSLL FetchFromSpansSafe(); 1158 1159 // REQUIRES: lock_ is held 1160 // Release a linked list of objects to spans. 1161 // May temporarily release lock_. 1162 void ReleaseListToSpans(HardenedSLL start); 1163 1164 // REQUIRES: lock_ is held 1165 // Release an object to spans. 1166 // May temporarily release lock_. 1167 ALWAYS_INLINE void ReleaseToSpans(HardenedSLL object); 1168 1169 // REQUIRES: lock_ is held 1170 // Populate cache by fetching from the page heap. 1171 // May temporarily release lock_. 1172 ALWAYS_INLINE void Populate(); 1173 1174 // REQUIRES: lock is held. 1175 // Tries to make room for a TCEntry. If the cache is full it will try to 1176 // expand it at the cost of some other cache size. Return false if there is 1177 // no space. 1178 bool MakeCacheSpace(); 1179 1180 // REQUIRES: lock_ for locked_size_class is held. 1181 // Picks a "random" size class to steal TCEntry slot from. In reality it 1182 // just iterates over the sizeclasses but does so without taking a lock. 1183 // Returns true on success. 1184 // May temporarily lock a "random" size class. 1185 static ALWAYS_INLINE bool EvictRandomSizeClass(size_t locked_size_class, bool force); 1186 1187 // REQUIRES: lock_ is *not* held. 1188 // Tries to shrink the Cache. If force is true it will relase objects to 1189 // spans if it allows it to shrink the cache. Return false if it failed to 1190 // shrink the cache. Decrements cache_size_ on succeess. 1191 // May temporarily take lock_. If it takes lock_, the locked_size_class 1192 // lock is released to the thread from holding two size class locks 1193 // concurrently which could lead to a deadlock. 1194 bool ShrinkCache(int locked_size_class, bool force); 1195 1196 // This lock protects all the data members. cached_entries and cache_size_ 1197 // may be looked at without holding the lock. 1198 SpinLock lock_; 1199 1200 // We keep linked lists of empty and non-empty spans. 1201 size_t size_class_; // My size class 1202 Span empty_; // Dummy header for list of empty spans 1203 Span nonempty_; // Dummy header for list of non-empty spans 1204 size_t counter_; // Number of free objects in cache entry 1205 1206 // Here we reserve space for TCEntry cache slots. Since one size class can 1207 // end up getting all the TCEntries quota in the system we just preallocate 1208 // sufficient number of entries here. 1209 TCEntry tc_slots_[kNumTransferEntries]; 1210 1211 // Number of currently used cached entries in tc_slots_. This variable is 1212 // updated under a lock but can be read without one. 1213 int32_t used_slots_; 1214 // The current number of slots for this size class. This is an 1215 // adaptive value that is increased if there is lots of traffic 1216 // on a given size class. 1217 int32_t cache_size_; 1218 uintptr_t entropy_; 1219}; 1220 1221#if COMPILER(CLANG) && defined(__has_warning) 1222#pragma clang diagnostic push 1223#if __has_warning("-Wunused-private-field") 1224#pragma clang diagnostic ignored "-Wunused-private-field" 1225#endif 1226#endif 1227 1228// Pad each CentralCache object to multiple of 64 bytes 1229template <size_t SizeToPad> 1230class TCMalloc_Central_FreeListPadded_Template : public TCMalloc_Central_FreeList { 1231private: 1232 char pad[64 - SizeToPad]; 1233}; 1234 1235// Zero-size specialization to avoid compiler error when TCMalloc_Central_FreeList happens 1236// to be exactly 64 bytes. 1237template <> class TCMalloc_Central_FreeListPadded_Template<0> : public TCMalloc_Central_FreeList { 1238}; 1239 1240typedef TCMalloc_Central_FreeListPadded_Template<sizeof(TCMalloc_Central_FreeList) % 64> TCMalloc_Central_FreeListPadded; 1241 1242#if COMPILER(CLANG) && defined(__has_warning) 1243#pragma clang diagnostic pop 1244#endif 1245 1246#if OS(MACOSX) 1247struct Span; 1248class TCMalloc_PageHeap; 1249class TCMalloc_ThreadCache; 1250template <typename T> class PageHeapAllocator; 1251 1252class FastMallocZone { 1253public: 1254 static void init(); 1255 1256 static kern_return_t enumerate(task_t, void*, unsigned typeMmask, vm_address_t zoneAddress, memory_reader_t, vm_range_recorder_t); 1257 static size_t goodSize(malloc_zone_t*, size_t size) { return size; } 1258 static boolean_t check(malloc_zone_t*) { return true; } 1259 static void print(malloc_zone_t*, boolean_t) { } 1260 static void log(malloc_zone_t*, void*) { } 1261 static void forceLock(malloc_zone_t*) { } 1262 static void forceUnlock(malloc_zone_t*) { } 1263 static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); } 1264 1265private: 1266 FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*, PageHeapAllocator<Span>*, PageHeapAllocator<TCMalloc_ThreadCache>*); 1267 static size_t size(malloc_zone_t*, const void*); 1268 static void* zoneMalloc(malloc_zone_t*, size_t); 1269 static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size); 1270 static void zoneFree(malloc_zone_t*, void*); 1271 static void* zoneRealloc(malloc_zone_t*, void*, size_t); 1272 static void* zoneValloc(malloc_zone_t*, size_t) { WTF_LOG_ERROR("valloc is not supported"); return 0; } 1273 static void zoneDestroy(malloc_zone_t*) { } 1274 1275 malloc_zone_t m_zone; 1276 TCMalloc_PageHeap* m_pageHeap; 1277 TCMalloc_ThreadCache** m_threadHeaps; 1278 TCMalloc_Central_FreeListPadded* m_centralCaches; 1279 PageHeapAllocator<Span>* m_spanAllocator; 1280 PageHeapAllocator<TCMalloc_ThreadCache>* m_pageHeapAllocator; 1281}; 1282 1283#endif 1284 1285// Even if we have support for thread-local storage in the compiler 1286// and linker, the OS may not support it. We need to check that at 1287// runtime. Right now, we have to keep a manual set of "bad" OSes. 1288#if defined(HAVE_TLS) 1289 static bool kernel_supports_tls = false; // be conservative 1290 static inline bool KernelSupportsTLS() { 1291 return kernel_supports_tls; 1292 } 1293# if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS 1294 static void CheckIfKernelSupportsTLS() { 1295 kernel_supports_tls = false; 1296 } 1297# else 1298# include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too 1299 static void CheckIfKernelSupportsTLS() { 1300 struct utsname buf; 1301 if (uname(&buf) != 0) { // should be impossible 1302 MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno); 1303 kernel_supports_tls = false; 1304 } else if (strcasecmp(buf.sysname, "linux") == 0) { 1305 // The linux case: the first kernel to support TLS was 2.6.0 1306 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x 1307 kernel_supports_tls = false; 1308 else if (buf.release[0] == '2' && buf.release[1] == '.' && 1309 buf.release[2] >= '0' && buf.release[2] < '6' && 1310 buf.release[3] == '.') // 2.0 - 2.5 1311 kernel_supports_tls = false; 1312 else 1313 kernel_supports_tls = true; 1314 } else { // some other kernel, we'll be optimisitic 1315 kernel_supports_tls = true; 1316 } 1317 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG 1318 } 1319# endif // HAVE_DECL_UNAME 1320#endif // HAVE_TLS 1321 1322// __THROW is defined in glibc systems. It means, counter-intuitively, 1323// "This function will never throw an exception." It's an optional 1324// optimization tool, but we may need to use it to match glibc prototypes. 1325#ifndef __THROW // I guess we're not on a glibc system 1326# define __THROW // __THROW is just an optimization, so ok to make it "" 1327#endif 1328 1329// ------------------------------------------------------------------------- 1330// Stack traces kept for sampled allocations 1331// The following state is protected by pageheap_lock_. 1332// ------------------------------------------------------------------------- 1333 1334// size/depth are made the same size as a pointer so that some generic 1335// code below can conveniently cast them back and forth to void*. 1336static const int kMaxStackDepth = 31; 1337struct StackTrace { 1338 uintptr_t size; // Size of object 1339 uintptr_t depth; // Number of PC values stored in array below 1340 void* stack[kMaxStackDepth]; 1341}; 1342static PageHeapAllocator<StackTrace> stacktrace_allocator; 1343static Span sampled_objects; 1344 1345// ------------------------------------------------------------------------- 1346// Map from page-id to per-page data 1347// ------------------------------------------------------------------------- 1348 1349// We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines. 1350// We also use a simple one-level cache for hot PageID-to-sizeclass mappings, 1351// because sometimes the sizeclass is all the information we need. 1352 1353// Selector class -- general selector uses 3-level map 1354template <int BITS> class MapSelector { 1355 public: 1356 typedef TCMalloc_PageMap3<BITS-kPageShift> Type; 1357 typedef PackedCache<BITS, uint64_t> CacheType; 1358}; 1359 1360#if CPU(X86_64) 1361// On all known X86-64 platforms, the upper 16 bits are always unused and therefore 1362// can be excluded from the PageMap key. 1363// See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details 1364 1365static const size_t kBitsUnusedOn64Bit = 16; 1366#else 1367static const size_t kBitsUnusedOn64Bit = 0; 1368#endif 1369 1370// A three-level map for 64-bit machines 1371template <> class MapSelector<64> { 1372 public: 1373 typedef TCMalloc_PageMap3<64 - kPageShift - kBitsUnusedOn64Bit> Type; 1374 typedef PackedCache<64, uint64_t> CacheType; 1375}; 1376 1377// A two-level map for 32-bit machines 1378template <> class MapSelector<32> { 1379 public: 1380 typedef TCMalloc_PageMap2<32 - kPageShift> Type; 1381 typedef PackedCache<32 - kPageShift, uint16_t> CacheType; 1382}; 1383 1384// ------------------------------------------------------------------------- 1385// Page-level allocator 1386// * Eager coalescing 1387// 1388// Heap for page-level allocation. We allow allocating and freeing a 1389// contiguous runs of pages (called a "span"). 1390// ------------------------------------------------------------------------- 1391 1392#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1393// The page heap maintains a free list for spans that are no longer in use by 1394// the central cache or any thread caches. We use a background thread to 1395// periodically scan the free list and release a percentage of it back to the OS. 1396 1397// If free_committed_pages_ exceeds kMinimumFreeCommittedPageCount, the 1398// background thread: 1399// - wakes up 1400// - pauses for kScavengeDelayInSeconds 1401// - returns to the OS a percentage of the memory that remained unused during 1402// that pause (kScavengePercentage * min_free_committed_pages_since_last_scavenge_) 1403// The goal of this strategy is to reduce memory pressure in a timely fashion 1404// while avoiding thrashing the OS allocator. 1405 1406// Time delay before the page heap scavenger will consider returning pages to 1407// the OS. 1408static const int kScavengeDelayInSeconds = 2; 1409 1410// Approximate percentage of free committed pages to return to the OS in one 1411// scavenge. 1412static const float kScavengePercentage = .5f; 1413 1414// number of span lists to keep spans in when memory is returned. 1415static const int kMinSpanListsWithSpans = 32; 1416 1417// Number of free committed pages that we want to keep around. The minimum number of pages used when there 1418// is 1 span in each of the first kMinSpanListsWithSpans spanlists. Currently 528 pages. 1419static const size_t kMinimumFreeCommittedPageCount = kMinSpanListsWithSpans * ((1.0f+kMinSpanListsWithSpans) / 2.0f); 1420 1421#endif 1422 1423static SpinLock pageheap_lock = SPINLOCK_INITIALIZER; 1424 1425class TCMalloc_PageHeap { 1426 public: 1427 void init(); 1428 1429 // Allocate a run of "n" pages. Returns zero if out of memory. 1430 Span* New(Length n); 1431 1432 // Delete the span "[p, p+n-1]". 1433 // REQUIRES: span was returned by earlier call to New() and 1434 // has not yet been deleted. 1435 void Delete(Span* span); 1436 1437 // Mark an allocated span as being used for small objects of the 1438 // specified size-class. 1439 // REQUIRES: span was returned by an earlier call to New() 1440 // and has not yet been deleted. 1441 void RegisterSizeClass(Span* span, size_t sc); 1442 1443 // Split an allocated span into two spans: one of length "n" pages 1444 // followed by another span of length "span->length - n" pages. 1445 // Modifies "*span" to point to the first span of length "n" pages. 1446 // Returns a pointer to the second span. 1447 // 1448 // REQUIRES: "0 < n < span->length" 1449 // REQUIRES: !span->free 1450 // REQUIRES: span->sizeclass == 0 1451 Span* Split(Span* span, Length n); 1452 1453 // Return the descriptor for the specified page. 1454 inline Span* GetDescriptor(PageID p) const { 1455 return reinterpret_cast<Span*>(pagemap_.get(p)); 1456 } 1457 1458 inline Span* GetDescriptorEnsureSafe(PageID p) 1459 { 1460 pagemap_.Ensure(p, 1); 1461 return GetDescriptor(p); 1462 } 1463 1464 size_t ReturnedBytes() const; 1465 1466 // Return number of bytes allocated from system 1467 inline uint64_t SystemBytes() const { return system_bytes_; } 1468 1469 // Return number of free bytes in heap 1470 uint64_t FreeBytes() const { 1471 return (static_cast<uint64_t>(free_pages_) << kPageShift); 1472 } 1473 1474 bool Check(); 1475 size_t CheckList(Span* list, Length min_pages, Length max_pages, bool decommitted); 1476 1477 // Release all pages on the free list for reuse by the OS: 1478 void ReleaseFreePages(); 1479 void ReleaseFreeList(Span*, Span*); 1480 1481 // Return 0 if we have no information, or else the correct sizeclass for p. 1482 // Reads and writes to pagemap_cache_ do not require locking. 1483 // The entries are 64 bits on 64-bit hardware and 16 bits on 1484 // 32-bit hardware, and we don't mind raciness as long as each read of 1485 // an entry yields a valid entry, not a partially updated entry. 1486 size_t GetSizeClassIfCached(PageID p) const { 1487 return pagemap_cache_.GetOrDefault(p, 0); 1488 } 1489 void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); } 1490 1491 private: 1492 // Pick the appropriate map and cache types based on pointer size 1493 typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap; 1494 typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache; 1495 PageMap pagemap_; 1496 mutable PageMapCache pagemap_cache_; 1497 1498 // We segregate spans of a given size into two circular linked 1499 // lists: one for normal spans, and one for spans whose memory 1500 // has been returned to the system. 1501 struct SpanList { 1502 Span normal; 1503 Span returned; 1504 }; 1505 1506 // List of free spans of length >= kMaxPages 1507 SpanList large_; 1508 1509 // Array mapping from span length to a doubly linked list of free spans 1510 SpanList free_[kMaxPages]; 1511 1512 // Number of pages kept in free lists 1513 uintptr_t free_pages_; 1514 1515 // Used for hardening 1516 uintptr_t entropy_; 1517 1518 // Bytes allocated from system 1519 uint64_t system_bytes_; 1520 1521#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1522 // Number of pages kept in free lists that are still committed. 1523 Length free_committed_pages_; 1524 1525 // Minimum number of free committed pages since last scavenge. (Can be 0 if 1526 // we've committed new pages since the last scavenge.) 1527 Length min_free_committed_pages_since_last_scavenge_; 1528#endif 1529 1530 bool GrowHeap(Length n); 1531 1532 // REQUIRES span->length >= n 1533 // Remove span from its free list, and move any leftover part of 1534 // span into appropriate free lists. Also update "span" to have 1535 // length exactly "n" and mark it as non-free so it can be returned 1536 // to the client. 1537 // 1538 // "released" is true iff "span" was found on a "returned" list. 1539 void Carve(Span* span, Length n, bool released); 1540 1541 void RecordSpan(Span* span) { 1542 pagemap_.set(span->start, span); 1543 if (span->length > 1) { 1544 pagemap_.set(span->start + span->length - 1, span); 1545 } 1546 } 1547 1548 // Allocate a large span of length == n. If successful, returns a 1549 // span of exactly the specified length. Else, returns NULL. 1550 Span* AllocLarge(Length n); 1551 1552#if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1553 // Incrementally release some memory to the system. 1554 // IncrementalScavenge(n) is called whenever n pages are freed. 1555 void IncrementalScavenge(Length n); 1556#endif 1557 1558 // Number of pages to deallocate before doing more scavenging 1559 int64_t scavenge_counter_; 1560 1561 // Index of last free list we scavenged 1562 size_t scavenge_index_; 1563 1564#if OS(MACOSX) 1565 friend class FastMallocZone; 1566#endif 1567 1568#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1569 void initializeScavenger(); 1570 ALWAYS_INLINE void signalScavenger(); 1571 void scavenge(); 1572 ALWAYS_INLINE bool shouldScavenge() const; 1573 1574#if HAVE(DISPATCH_H) || OS(WIN) 1575 void periodicScavenge(); 1576 ALWAYS_INLINE bool isScavengerSuspended(); 1577 ALWAYS_INLINE void scheduleScavenger(); 1578 ALWAYS_INLINE void rescheduleScavenger(); 1579 ALWAYS_INLINE void suspendScavenger(); 1580#endif 1581 1582#if HAVE(DISPATCH_H) 1583 dispatch_queue_t m_scavengeQueue; 1584 dispatch_source_t m_scavengeTimer; 1585 bool m_scavengingSuspended; 1586#elif OS(WIN) 1587 static void CALLBACK scavengerTimerFired(void*, BOOLEAN); 1588 HANDLE m_scavengeQueueTimer; 1589#else 1590 static NO_RETURN_WITH_VALUE void* runScavengerThread(void*); 1591 NO_RETURN void scavengerThread(); 1592 1593 // Keeps track of whether the background thread is actively scavenging memory every kScavengeDelayInSeconds, or 1594 // it's blocked waiting for more pages to be deleted. 1595 bool m_scavengeThreadActive; 1596 1597 pthread_mutex_t m_scavengeMutex; 1598 pthread_cond_t m_scavengeCondition; 1599#endif 1600 1601#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1602}; 1603 1604void TCMalloc_PageHeap::init() 1605{ 1606 pagemap_.init(MetaDataAlloc); 1607 pagemap_cache_ = PageMapCache(0); 1608 free_pages_ = 0; 1609 system_bytes_ = 0; 1610 entropy_ = HARDENING_ENTROPY; 1611 1612#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1613 free_committed_pages_ = 0; 1614 min_free_committed_pages_since_last_scavenge_ = 0; 1615#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1616 1617 scavenge_counter_ = 0; 1618 // Start scavenging at kMaxPages list 1619 scavenge_index_ = kMaxPages-1; 1620 COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits); 1621 DLL_Init(&large_.normal, entropy_); 1622 DLL_Init(&large_.returned, entropy_); 1623 for (size_t i = 0; i < kMaxPages; i++) { 1624 DLL_Init(&free_[i].normal, entropy_); 1625 DLL_Init(&free_[i].returned, entropy_); 1626 } 1627 1628#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1629 initializeScavenger(); 1630#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1631} 1632 1633#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1634 1635#if HAVE(DISPATCH_H) 1636 1637void TCMalloc_PageHeap::initializeScavenger() 1638{ 1639 m_scavengeQueue = dispatch_queue_create("com.apple.JavaScriptCore.FastMallocSavenger", NULL); 1640 m_scavengeTimer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, m_scavengeQueue); 1641 uint64_t scavengeDelayInNanoseconds = kScavengeDelayInSeconds * NSEC_PER_SEC; 1642 dispatch_time_t startTime = dispatch_time(DISPATCH_TIME_NOW, scavengeDelayInNanoseconds); 1643 dispatch_source_set_timer(m_scavengeTimer, startTime, scavengeDelayInNanoseconds, scavengeDelayInNanoseconds / 10); 1644 dispatch_source_set_event_handler(m_scavengeTimer, ^{ periodicScavenge(); }); 1645 m_scavengingSuspended = true; 1646} 1647 1648ALWAYS_INLINE bool TCMalloc_PageHeap::isScavengerSuspended() 1649{ 1650 ASSERT(pageheap_lock.IsHeld()); 1651 return m_scavengingSuspended; 1652} 1653 1654ALWAYS_INLINE void TCMalloc_PageHeap::scheduleScavenger() 1655{ 1656 ASSERT(pageheap_lock.IsHeld()); 1657 m_scavengingSuspended = false; 1658 dispatch_resume(m_scavengeTimer); 1659} 1660 1661ALWAYS_INLINE void TCMalloc_PageHeap::rescheduleScavenger() 1662{ 1663 // Nothing to do here for libdispatch. 1664} 1665 1666ALWAYS_INLINE void TCMalloc_PageHeap::suspendScavenger() 1667{ 1668 ASSERT(pageheap_lock.IsHeld()); 1669 m_scavengingSuspended = true; 1670 dispatch_suspend(m_scavengeTimer); 1671} 1672 1673#elif OS(WIN) 1674 1675void TCMalloc_PageHeap::scavengerTimerFired(void* context, BOOLEAN) 1676{ 1677 static_cast<TCMalloc_PageHeap*>(context)->periodicScavenge(); 1678} 1679 1680void TCMalloc_PageHeap::initializeScavenger() 1681{ 1682 m_scavengeQueueTimer = 0; 1683} 1684 1685ALWAYS_INLINE bool TCMalloc_PageHeap::isScavengerSuspended() 1686{ 1687 ASSERT(pageheap_lock.IsHeld()); 1688 return !m_scavengeQueueTimer; 1689} 1690 1691ALWAYS_INLINE void TCMalloc_PageHeap::scheduleScavenger() 1692{ 1693 // We need to use WT_EXECUTEONLYONCE here and reschedule the timer, because 1694 // Windows will fire the timer event even when the function is already running. 1695 ASSERT(pageheap_lock.IsHeld()); 1696 CreateTimerQueueTimer(&m_scavengeQueueTimer, 0, scavengerTimerFired, this, kScavengeDelayInSeconds * 1000, 0, WT_EXECUTEONLYONCE); 1697} 1698 1699ALWAYS_INLINE void TCMalloc_PageHeap::rescheduleScavenger() 1700{ 1701 // We must delete the timer and create it again, because it is not possible to retrigger a timer on Windows. 1702 suspendScavenger(); 1703 scheduleScavenger(); 1704} 1705 1706ALWAYS_INLINE void TCMalloc_PageHeap::suspendScavenger() 1707{ 1708 ASSERT(pageheap_lock.IsHeld()); 1709 HANDLE scavengeQueueTimer = m_scavengeQueueTimer; 1710 m_scavengeQueueTimer = 0; 1711 DeleteTimerQueueTimer(0, scavengeQueueTimer, 0); 1712} 1713 1714#else 1715 1716void TCMalloc_PageHeap::initializeScavenger() 1717{ 1718 // Create a non-recursive mutex. 1719#if !defined(PTHREAD_MUTEX_NORMAL) || PTHREAD_MUTEX_NORMAL == PTHREAD_MUTEX_DEFAULT 1720 pthread_mutex_init(&m_scavengeMutex, 0); 1721#else 1722 pthread_mutexattr_t attr; 1723 pthread_mutexattr_init(&attr); 1724 pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL); 1725 1726 pthread_mutex_init(&m_scavengeMutex, &attr); 1727 1728 pthread_mutexattr_destroy(&attr); 1729#endif 1730 1731 pthread_cond_init(&m_scavengeCondition, 0); 1732 m_scavengeThreadActive = true; 1733 pthread_t thread; 1734 pthread_create(&thread, 0, runScavengerThread, this); 1735} 1736 1737void* TCMalloc_PageHeap::runScavengerThread(void* context) 1738{ 1739 static_cast<TCMalloc_PageHeap*>(context)->scavengerThread(); 1740#if COMPILER(MSVC) 1741 // Without this, Visual Studio will complain that this method does not return a value. 1742 return 0; 1743#endif 1744} 1745 1746ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger() 1747{ 1748 // shouldScavenge() should be called only when the pageheap_lock spinlock is held, additionally, 1749 // m_scavengeThreadActive is only set to false whilst pageheap_lock is held. The caller must ensure this is 1750 // taken prior to calling this method. If the scavenger thread is sleeping and shouldScavenge() indicates there 1751 // is memory to free the scavenger thread is signalled to start. 1752 ASSERT(pageheap_lock.IsHeld()); 1753 if (!m_scavengeThreadActive && shouldScavenge()) 1754 pthread_cond_signal(&m_scavengeCondition); 1755} 1756 1757#endif 1758 1759void TCMalloc_PageHeap::scavenge() 1760{ 1761 size_t pagesToRelease = min_free_committed_pages_since_last_scavenge_ * kScavengePercentage; 1762 size_t targetPageCount = std::max<size_t>(kMinimumFreeCommittedPageCount, free_committed_pages_ - pagesToRelease); 1763 1764 Length lastFreeCommittedPages = free_committed_pages_; 1765 while (free_committed_pages_ > targetPageCount) { 1766 ASSERT(Check()); 1767 for (int i = kMaxPages; i > 0 && free_committed_pages_ >= targetPageCount; i--) { 1768 SpanList* slist = (static_cast<size_t>(i) == kMaxPages) ? &large_ : &free_[i]; 1769 // If the span size is bigger than kMinSpanListsWithSpans pages return all the spans in the list, else return all but 1 span. 1770 // Return only 50% of a spanlist at a time so spans of size 1 are not the only ones left. 1771 size_t length = DLL_Length(&slist->normal, entropy_); 1772 size_t numSpansToReturn = (i > kMinSpanListsWithSpans) ? length : length / 2; 1773 for (int j = 0; static_cast<size_t>(j) < numSpansToReturn && !DLL_IsEmpty(&slist->normal, entropy_) && free_committed_pages_ > targetPageCount; j++) { 1774 Span* s = slist->normal.prev(entropy_); 1775 DLL_Remove(s, entropy_); 1776 ASSERT(!s->decommitted); 1777 if (!s->decommitted) { 1778 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), 1779 static_cast<size_t>(s->length << kPageShift)); 1780 ASSERT(free_committed_pages_ >= s->length); 1781 free_committed_pages_ -= s->length; 1782 s->decommitted = true; 1783 } 1784 DLL_Prepend(&slist->returned, s, entropy_); 1785 } 1786 } 1787 1788 if (lastFreeCommittedPages == free_committed_pages_) 1789 break; 1790 lastFreeCommittedPages = free_committed_pages_; 1791 } 1792 1793 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1794} 1795 1796ALWAYS_INLINE bool TCMalloc_PageHeap::shouldScavenge() const 1797{ 1798 return free_committed_pages_ > kMinimumFreeCommittedPageCount; 1799} 1800 1801#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1802 1803inline Span* TCMalloc_PageHeap::New(Length n) { 1804 ASSERT(Check()); 1805 ASSERT(n > 0); 1806 1807 // Find first size >= n that has a non-empty list 1808 for (Length s = n; s < kMaxPages; s++) { 1809 Span* ll = NULL; 1810 bool released = false; 1811 if (!DLL_IsEmpty(&free_[s].normal, entropy_)) { 1812 // Found normal span 1813 ll = &free_[s].normal; 1814 } else if (!DLL_IsEmpty(&free_[s].returned, entropy_)) { 1815 // Found returned span; reallocate it 1816 ll = &free_[s].returned; 1817 released = true; 1818 } else { 1819 // Keep looking in larger classes 1820 continue; 1821 } 1822 1823 Span* result = ll->next(entropy_); 1824 Carve(result, n, released); 1825#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1826 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the 1827 // free committed pages count. 1828 ASSERT(free_committed_pages_ >= n); 1829 free_committed_pages_ -= n; 1830 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 1831 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1832#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1833 ASSERT(Check()); 1834 free_pages_ -= n; 1835 return result; 1836 } 1837 1838 Span* result = AllocLarge(n); 1839 if (result != NULL) { 1840 ASSERT_SPAN_COMMITTED(result); 1841 return result; 1842 } 1843 1844 // Grow the heap and try again 1845 if (!GrowHeap(n)) { 1846 ASSERT(Check()); 1847 return NULL; 1848 } 1849 1850 return New(n); 1851} 1852 1853Span* TCMalloc_PageHeap::AllocLarge(Length n) { 1854 // find the best span (closest to n in size). 1855 // The following loops implements address-ordered best-fit. 1856 bool from_released = false; 1857 Span *best = NULL; 1858 1859 // Search through normal list 1860 for (Span* span = large_.normal.next(entropy_); 1861 span != &large_.normal; 1862 span = span->next(entropy_)) { 1863 if (span->length >= n) { 1864 if ((best == NULL) 1865 || (span->length < best->length) 1866 || ((span->length == best->length) && (span->start < best->start))) { 1867 best = span; 1868 from_released = false; 1869 } 1870 } 1871 } 1872 1873 // Search through released list in case it has a better fit 1874 for (Span* span = large_.returned.next(entropy_); 1875 span != &large_.returned; 1876 span = span->next(entropy_)) { 1877 if (span->length >= n) { 1878 if ((best == NULL) 1879 || (span->length < best->length) 1880 || ((span->length == best->length) && (span->start < best->start))) { 1881 best = span; 1882 from_released = true; 1883 } 1884 } 1885 } 1886 1887 if (best != NULL) { 1888 Carve(best, n, from_released); 1889#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1890 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the 1891 // free committed pages count. 1892 ASSERT(free_committed_pages_ >= n); 1893 free_committed_pages_ -= n; 1894 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 1895 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1896#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1897 ASSERT(Check()); 1898 free_pages_ -= n; 1899 return best; 1900 } 1901 return NULL; 1902} 1903 1904Span* TCMalloc_PageHeap::Split(Span* span, Length n) { 1905 ASSERT(0 < n); 1906 ASSERT(n < span->length); 1907 ASSERT(!span->free); 1908 ASSERT(span->sizeclass == 0); 1909 Event(span, 'T', n); 1910 1911 const Length extra = span->length - n; 1912 Span* leftover = NewSpan(span->start + n, extra); 1913 Event(leftover, 'U', extra); 1914 RecordSpan(leftover); 1915 pagemap_.set(span->start + n - 1, span); // Update map from pageid to span 1916 span->length = n; 1917 1918 return leftover; 1919} 1920 1921inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) { 1922 ASSERT(n > 0); 1923 DLL_Remove(span, entropy_); 1924 span->free = 0; 1925 Event(span, 'A', n); 1926 1927 if (released) { 1928 // If the span chosen to carve from is decommited, commit the entire span at once to avoid committing spans 1 page at a time. 1929 ASSERT(span->decommitted); 1930 TCMalloc_SystemCommit(reinterpret_cast<void*>(span->start << kPageShift), static_cast<size_t>(span->length << kPageShift)); 1931 span->decommitted = false; 1932#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1933 free_committed_pages_ += span->length; 1934#endif 1935 } 1936 1937 const int extra = static_cast<int>(span->length - n); 1938 ASSERT(extra >= 0); 1939 if (extra > 0) { 1940 Span* leftover = NewSpan(span->start + n, extra); 1941 leftover->free = 1; 1942 leftover->decommitted = false; 1943 Event(leftover, 'S', extra); 1944 RecordSpan(leftover); 1945 1946 // Place leftover span on appropriate free list 1947 SpanList* listpair = (static_cast<size_t>(extra) < kMaxPages) ? &free_[extra] : &large_; 1948 Span* dst = &listpair->normal; 1949 DLL_Prepend(dst, leftover, entropy_); 1950 1951 span->length = n; 1952 pagemap_.set(span->start + n - 1, span); 1953 } 1954} 1955 1956static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other) 1957{ 1958 if (destination->decommitted && !other->decommitted) { 1959 TCMalloc_SystemRelease(reinterpret_cast<void*>(other->start << kPageShift), 1960 static_cast<size_t>(other->length << kPageShift)); 1961 } else if (other->decommitted && !destination->decommitted) { 1962 TCMalloc_SystemRelease(reinterpret_cast<void*>(destination->start << kPageShift), 1963 static_cast<size_t>(destination->length << kPageShift)); 1964 destination->decommitted = true; 1965 } 1966} 1967 1968inline void TCMalloc_PageHeap::Delete(Span* span) { 1969 ASSERT(Check()); 1970 ASSERT(!span->free); 1971 ASSERT(span->length > 0); 1972 ASSERT(GetDescriptor(span->start) == span); 1973 ASSERT(GetDescriptor(span->start + span->length - 1) == span); 1974 span->sizeclass = 0; 1975#ifndef NO_TCMALLOC_SAMPLES 1976 span->sample = 0; 1977#endif 1978 1979 // Coalesce -- we guarantee that "p" != 0, so no bounds checking 1980 // necessary. We do not bother resetting the stale pagemap 1981 // entries for the pieces we are merging together because we only 1982 // care about the pagemap entries for the boundaries. 1983#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1984 // Track the total size of the neighboring free spans that are committed. 1985 Length neighboringCommittedSpansLength = 0; 1986#endif 1987 const PageID p = span->start; 1988 const Length n = span->length; 1989 Span* prev = GetDescriptor(p-1); 1990 if (prev != NULL && prev->free) { 1991 // Merge preceding span into this span 1992 ASSERT(prev->start + prev->length == p); 1993 const Length len = prev->length; 1994#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1995 if (!prev->decommitted) 1996 neighboringCommittedSpansLength += len; 1997#endif 1998 mergeDecommittedStates(span, prev); 1999 DLL_Remove(prev, entropy_); 2000 DeleteSpan(prev); 2001 span->start -= len; 2002 span->length += len; 2003 pagemap_.set(span->start, span); 2004 Event(span, 'L', len); 2005 } 2006 Span* next = GetDescriptor(p+n); 2007 if (next != NULL && next->free) { 2008 // Merge next span into this span 2009 ASSERT(next->start == p+n); 2010 const Length len = next->length; 2011#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2012 if (!next->decommitted) 2013 neighboringCommittedSpansLength += len; 2014#endif 2015 mergeDecommittedStates(span, next); 2016 DLL_Remove(next, entropy_); 2017 DeleteSpan(next); 2018 span->length += len; 2019 pagemap_.set(span->start + span->length - 1, span); 2020 Event(span, 'R', len); 2021 } 2022 2023 Event(span, 'D', span->length); 2024 span->free = 1; 2025 if (span->decommitted) { 2026 if (span->length < kMaxPages) 2027 DLL_Prepend(&free_[span->length].returned, span, entropy_); 2028 else 2029 DLL_Prepend(&large_.returned, span, entropy_); 2030 } else { 2031 if (span->length < kMaxPages) 2032 DLL_Prepend(&free_[span->length].normal, span, entropy_); 2033 else 2034 DLL_Prepend(&large_.normal, span, entropy_); 2035 } 2036 free_pages_ += n; 2037 2038#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2039 if (span->decommitted) { 2040 // If the merged span is decommitted, that means we decommitted any neighboring spans that were 2041 // committed. Update the free committed pages count. 2042 free_committed_pages_ -= neighboringCommittedSpansLength; 2043 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 2044 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 2045 } else { 2046 // If the merged span remains committed, add the deleted span's size to the free committed pages count. 2047 free_committed_pages_ += n; 2048 } 2049 2050 // Make sure the scavenge thread becomes active if we have enough freed pages to release some back to the system. 2051 signalScavenger(); 2052#else 2053 IncrementalScavenge(n); 2054#endif 2055 2056 ASSERT(Check()); 2057} 2058 2059#if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2060void TCMalloc_PageHeap::IncrementalScavenge(Length n) { 2061 // Fast path; not yet time to release memory 2062 scavenge_counter_ -= n; 2063 if (scavenge_counter_ >= 0) return; // Not yet time to scavenge 2064 2065 // If there is nothing to release, wait for so many pages before 2066 // scavenging again. With 4K pages, this comes to 16MB of memory. 2067 static const size_t kDefaultReleaseDelay = 1 << 8; 2068 2069 // Find index of free list to scavenge 2070 size_t index = scavenge_index_ + 1; 2071 uintptr_t entropy = entropy_; 2072 for (size_t i = 0; i < kMaxPages+1; i++) { 2073 if (index > kMaxPages) index = 0; 2074 SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index]; 2075 if (!DLL_IsEmpty(&slist->normal, entropy)) { 2076 // Release the last span on the normal portion of this list 2077 Span* s = slist->normal.prev(entropy); 2078 DLL_Remove(s, entropy_); 2079 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), 2080 static_cast<size_t>(s->length << kPageShift)); 2081 s->decommitted = true; 2082 DLL_Prepend(&slist->returned, s, entropy); 2083 2084 scavenge_counter_ = std::max<size_t>(64UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay))); 2085 2086 if (index == kMaxPages && !DLL_IsEmpty(&slist->normal, entropy)) 2087 scavenge_index_ = index - 1; 2088 else 2089 scavenge_index_ = index; 2090 return; 2091 } 2092 index++; 2093 } 2094 2095 // Nothing to scavenge, delay for a while 2096 scavenge_counter_ = kDefaultReleaseDelay; 2097} 2098#endif 2099 2100void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) { 2101 // Associate span object with all interior pages as well 2102 ASSERT(!span->free); 2103 ASSERT(GetDescriptor(span->start) == span); 2104 ASSERT(GetDescriptor(span->start+span->length-1) == span); 2105 Event(span, 'C', sc); 2106 span->sizeclass = static_cast<unsigned int>(sc); 2107 for (Length i = 1; i < span->length-1; i++) { 2108 pagemap_.set(span->start+i, span); 2109 } 2110} 2111 2112size_t TCMalloc_PageHeap::ReturnedBytes() const { 2113 size_t result = 0; 2114 for (unsigned s = 0; s < kMaxPages; s++) { 2115 const int r_length = DLL_Length(&free_[s].returned, entropy_); 2116 unsigned r_pages = s * r_length; 2117 result += r_pages << kPageShift; 2118 } 2119 2120 for (Span* s = large_.returned.next(entropy_); s != &large_.returned; s = s->next(entropy_)) 2121 result += s->length << kPageShift; 2122 return result; 2123} 2124 2125bool TCMalloc_PageHeap::GrowHeap(Length n) { 2126 ASSERT(kMaxPages >= kMinSystemAlloc); 2127 if (n > kMaxValidPages) return false; 2128 Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc); 2129 size_t actual_size; 2130 void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); 2131 if (ptr == NULL) { 2132 if (n < ask) { 2133 // Try growing just "n" pages 2134 ask = n; 2135 ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); 2136 } 2137 if (ptr == NULL) return false; 2138 } 2139 ask = actual_size >> kPageShift; 2140 2141 uint64_t old_system_bytes = system_bytes_; 2142 system_bytes_ += (ask << kPageShift); 2143 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 2144 ASSERT(p > 0); 2145 2146 // If we have already a lot of pages allocated, just pre allocate a bunch of 2147 // memory for the page map. This prevents fragmentation by pagemap metadata 2148 // when a program keeps allocating and freeing large blocks. 2149 2150 if (old_system_bytes < kPageMapBigAllocationThreshold 2151 && system_bytes_ >= kPageMapBigAllocationThreshold) { 2152 pagemap_.PreallocateMoreMemory(); 2153 } 2154 2155 // Make sure pagemap_ has entries for all of the new pages. 2156 // Plus ensure one before and one after so coalescing code 2157 // does not need bounds-checking. 2158 if (pagemap_.Ensure(p-1, ask+2)) { 2159 // Pretend the new area is allocated and then Delete() it to 2160 // cause any necessary coalescing to occur. 2161 // 2162 // We do not adjust free_pages_ here since Delete() will do it for us. 2163 Span* span = NewSpan(p, ask); 2164 RecordSpan(span); 2165 Delete(span); 2166 ASSERT(Check()); 2167 return true; 2168 } else { 2169 // We could not allocate memory within "pagemap_" 2170 // TODO: Once we can return memory to the system, return the new span 2171 return false; 2172 } 2173} 2174 2175bool TCMalloc_PageHeap::Check() { 2176#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2177 size_t totalFreeCommitted = 0; 2178#endif 2179 ASSERT(free_[0].normal.next(entropy_) == &free_[0].normal); 2180 ASSERT(free_[0].returned.next(entropy_) == &free_[0].returned); 2181#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2182 totalFreeCommitted = CheckList(&large_.normal, kMaxPages, 1000000000, false); 2183#else 2184 CheckList(&large_.normal, kMaxPages, 1000000000, false); 2185#endif 2186 CheckList(&large_.returned, kMaxPages, 1000000000, true); 2187 for (Length s = 1; s < kMaxPages; s++) { 2188#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2189 totalFreeCommitted += CheckList(&free_[s].normal, s, s, false); 2190#else 2191 CheckList(&free_[s].normal, s, s, false); 2192#endif 2193 CheckList(&free_[s].returned, s, s, true); 2194 } 2195#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2196 ASSERT(totalFreeCommitted == free_committed_pages_); 2197#endif 2198 return true; 2199} 2200 2201#if ASSERT_DISABLED 2202size_t TCMalloc_PageHeap::CheckList(Span*, Length, Length, bool) { 2203 return 0; 2204} 2205#else 2206size_t TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages, bool decommitted) { 2207 size_t freeCount = 0; 2208 for (Span* s = list->next(entropy_); s != list; s = s->next(entropy_)) { 2209 CHECK_CONDITION(s->free); 2210 CHECK_CONDITION(s->length >= min_pages); 2211 CHECK_CONDITION(s->length <= max_pages); 2212 CHECK_CONDITION(GetDescriptor(s->start) == s); 2213 CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s); 2214 CHECK_CONDITION(s->decommitted == decommitted); 2215 freeCount += s->length; 2216 } 2217 return freeCount; 2218} 2219#endif 2220 2221void TCMalloc_PageHeap::ReleaseFreeList(Span* list, Span* returned) { 2222 // Walk backwards through list so that when we push these 2223 // spans on the "returned" list, we preserve the order. 2224#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2225 size_t freePageReduction = 0; 2226#endif 2227 2228 while (!DLL_IsEmpty(list, entropy_)) { 2229 Span* s = list->prev(entropy_); 2230 2231 DLL_Remove(s, entropy_); 2232 s->decommitted = true; 2233 DLL_Prepend(returned, s, entropy_); 2234 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), 2235 static_cast<size_t>(s->length << kPageShift)); 2236#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2237 freePageReduction += s->length; 2238#endif 2239 } 2240 2241#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2242 free_committed_pages_ -= freePageReduction; 2243 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 2244 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 2245#endif 2246} 2247 2248void TCMalloc_PageHeap::ReleaseFreePages() { 2249 for (Length s = 0; s < kMaxPages; s++) { 2250 ReleaseFreeList(&free_[s].normal, &free_[s].returned); 2251 } 2252 ReleaseFreeList(&large_.normal, &large_.returned); 2253 ASSERT(Check()); 2254} 2255 2256//------------------------------------------------------------------- 2257// Free list 2258//------------------------------------------------------------------- 2259 2260class TCMalloc_ThreadCache_FreeList { 2261 private: 2262 HardenedSLL list_; // Linked list of nodes 2263 uint16_t length_; // Current length 2264 uint16_t lowater_; // Low water mark for list length 2265 uintptr_t entropy_; // Entropy source for hardening 2266 2267 public: 2268 void Init(uintptr_t entropy) { 2269 list_.setValue(NULL); 2270 length_ = 0; 2271 lowater_ = 0; 2272 entropy_ = entropy; 2273#if ENABLE(TCMALLOC_HARDENING) 2274 ASSERT(entropy_); 2275#endif 2276 } 2277 2278 // Return current length of list 2279 int length() const { 2280 return length_; 2281 } 2282 2283 // Is list empty? 2284 bool empty() const { 2285 return !list_; 2286 } 2287 2288 // Low-water mark management 2289 int lowwatermark() const { return lowater_; } 2290 void clear_lowwatermark() { lowater_ = length_; } 2291 2292 ALWAYS_INLINE void Push(HardenedSLL ptr) { 2293 SLL_Push(&list_, ptr, entropy_); 2294 length_++; 2295 } 2296 2297 void PushRange(int N, HardenedSLL start, HardenedSLL end) { 2298 SLL_PushRange(&list_, start, end, entropy_); 2299 length_ = length_ + static_cast<uint16_t>(N); 2300 } 2301 2302 void PopRange(int N, HardenedSLL* start, HardenedSLL* end) { 2303 SLL_PopRange(&list_, N, start, end, entropy_); 2304 ASSERT(length_ >= N); 2305 length_ = length_ - static_cast<uint16_t>(N); 2306 if (length_ < lowater_) lowater_ = length_; 2307 } 2308 2309 ALWAYS_INLINE void* Pop() { 2310 ASSERT(list_); 2311 length_--; 2312 if (length_ < lowater_) lowater_ = length_; 2313 return SLL_Pop(&list_, entropy_).value(); 2314 } 2315 2316 // Runs through the linked list to ensure that 2317 // we can do that, and ensures that 'missing' 2318 // is not present 2319 NEVER_INLINE void Validate(HardenedSLL missing, size_t size) { 2320 HardenedSLL node = list_; 2321 while (node) { 2322 RELEASE_ASSERT(node != missing); 2323 RELEASE_ASSERT(IS_DEFINITELY_POISONED(node.value(), size)); 2324 node = SLL_Next(node, entropy_); 2325 } 2326 } 2327 2328 template <class Finder, class Reader> 2329 void enumerateFreeObjects(Finder& finder, const Reader& reader) 2330 { 2331 for (HardenedSLL nextObject = list_; nextObject; nextObject.setValue(reader.nextEntryInHardenedLinkedList(reinterpret_cast<void**>(nextObject.value()), entropy_))) 2332 finder.visit(nextObject.value()); 2333 } 2334}; 2335 2336//------------------------------------------------------------------- 2337// Data kept per thread 2338//------------------------------------------------------------------- 2339 2340class TCMalloc_ThreadCache { 2341 private: 2342 typedef TCMalloc_ThreadCache_FreeList FreeList; 2343#if OS(WIN) 2344 typedef DWORD ThreadIdentifier; 2345#else 2346 typedef pthread_t ThreadIdentifier; 2347#endif 2348 2349 size_t size_; // Combined size of data 2350 ThreadIdentifier tid_; // Which thread owns it 2351 bool in_setspecific_; // Called pthread_setspecific? 2352 FreeList list_[kNumClasses]; // Array indexed by size-class 2353 2354 // We sample allocations, biased by the size of the allocation 2355 uint32_t rnd_; // Cheap random number generator 2356 size_t bytes_until_sample_; // Bytes until we sample next 2357 2358 uintptr_t entropy_; // Entropy value used for hardening 2359 2360 // Allocate a new heap. REQUIRES: pageheap_lock is held. 2361 static inline TCMalloc_ThreadCache* NewHeap(ThreadIdentifier tid, uintptr_t entropy); 2362 2363 // Use only as pthread thread-specific destructor function. 2364 static void DestroyThreadCache(void* ptr); 2365 public: 2366 // All ThreadCache objects are kept in a linked list (for stats collection) 2367 TCMalloc_ThreadCache* next_; 2368 TCMalloc_ThreadCache* prev_; 2369 2370 void Init(ThreadIdentifier tid, uintptr_t entropy); 2371 void Cleanup(); 2372 2373 // Accessors (mostly just for printing stats) 2374 int freelist_length(size_t cl) const { return list_[cl].length(); } 2375 2376 // Total byte size in cache 2377 size_t Size() const { return size_; } 2378 2379 ALWAYS_INLINE void* Allocate(size_t size); 2380 void Deallocate(HardenedSLL ptr, size_t size_class); 2381 2382 ALWAYS_INLINE void FetchFromCentralCache(size_t cl, size_t allocationSize); 2383 void ReleaseToCentralCache(size_t cl, int N); 2384 void Scavenge(); 2385 void Print() const; 2386 2387 // Record allocation of "k" bytes. Return true iff allocation 2388 // should be sampled 2389 bool SampleAllocation(size_t k); 2390 2391 // Pick next sampling point 2392 void PickNextSample(size_t k); 2393 2394 static void InitModule(); 2395 static void InitTSD(); 2396 static TCMalloc_ThreadCache* GetThreadHeap(); 2397 static TCMalloc_ThreadCache* GetCache(); 2398 static TCMalloc_ThreadCache* GetCacheIfPresent(); 2399 static TCMalloc_ThreadCache* CreateCacheIfNecessary(); 2400 static void DeleteCache(TCMalloc_ThreadCache* heap); 2401 static void BecomeIdle(); 2402 static void RecomputeThreadCacheSize(); 2403 2404 template <class Finder, class Reader> 2405 void enumerateFreeObjects(Finder& finder, const Reader& reader) 2406 { 2407 for (unsigned sizeClass = 0; sizeClass < kNumClasses; sizeClass++) 2408 list_[sizeClass].enumerateFreeObjects(finder, reader); 2409 } 2410}; 2411 2412//------------------------------------------------------------------- 2413// Global variables 2414//------------------------------------------------------------------- 2415 2416// Central cache -- a collection of free-lists, one per size-class. 2417// We have a separate lock per free-list to reduce contention. 2418static TCMalloc_Central_FreeListPadded central_cache[kNumClasses]; 2419 2420// Page-level allocator 2421static AllocAlignmentInteger pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(AllocAlignmentInteger) - 1) / sizeof(AllocAlignmentInteger)]; 2422static bool phinited = false; 2423 2424// Avoid extra level of indirection by making "pageheap" be just an alias 2425// of pageheap_memory. 2426typedef union { 2427 void* m_memory; 2428 TCMalloc_PageHeap* m_pageHeap; 2429} PageHeapUnion; 2430 2431static inline TCMalloc_PageHeap* getPageHeap() 2432{ 2433 PageHeapUnion u = { &pageheap_memory[0] }; 2434 return u.m_pageHeap; 2435} 2436 2437#define pageheap getPageHeap() 2438 2439#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2440 2441#if HAVE(DISPATCH_H) || OS(WIN) 2442 2443void TCMalloc_PageHeap::periodicScavenge() 2444{ 2445 SpinLockHolder h(&pageheap_lock); 2446 pageheap->scavenge(); 2447 2448 if (shouldScavenge()) { 2449 rescheduleScavenger(); 2450 return; 2451 } 2452 2453 suspendScavenger(); 2454} 2455 2456ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger() 2457{ 2458 ASSERT(pageheap_lock.IsHeld()); 2459 if (isScavengerSuspended() && shouldScavenge()) 2460 scheduleScavenger(); 2461} 2462 2463#else 2464 2465void TCMalloc_PageHeap::scavengerThread() 2466{ 2467#if HAVE(PTHREAD_SETNAME_NP) 2468 pthread_setname_np("JavaScriptCore: FastMalloc scavenger"); 2469#endif 2470 2471 while (1) { 2472 pageheap_lock.Lock(); 2473 if (!shouldScavenge()) { 2474 // Set to false so that signalScavenger() will check whether we need to be siganlled. 2475 m_scavengeThreadActive = false; 2476 2477 // We need to unlock now, as this thread will block on the condvar until scavenging is required. 2478 pageheap_lock.Unlock(); 2479 2480 // Block until there are enough free committed pages to release back to the system. 2481 pthread_mutex_lock(&m_scavengeMutex); 2482 pthread_cond_wait(&m_scavengeCondition, &m_scavengeMutex); 2483 // After exiting the pthread_cond_wait, we hold the lock on m_scavengeMutex. Unlock it to prevent 2484 // deadlock next time round the loop. 2485 pthread_mutex_unlock(&m_scavengeMutex); 2486 2487 // Set to true to prevent unnecessary signalling of the condvar. 2488 m_scavengeThreadActive = true; 2489 } else 2490 pageheap_lock.Unlock(); 2491 2492 // Wait for a while to calculate how much memory remains unused during this pause. 2493 sleep(kScavengeDelayInSeconds); 2494 2495 { 2496 SpinLockHolder h(&pageheap_lock); 2497 pageheap->scavenge(); 2498 } 2499 } 2500} 2501 2502#endif 2503 2504#endif 2505 2506// If TLS is available, we also store a copy 2507// of the per-thread object in a __thread variable 2508// since __thread variables are faster to read 2509// than pthread_getspecific(). We still need 2510// pthread_setspecific() because __thread 2511// variables provide no way to run cleanup 2512// code when a thread is destroyed. 2513#ifdef HAVE_TLS 2514static __thread TCMalloc_ThreadCache *threadlocal_heap; 2515#endif 2516// Thread-specific key. Initialization here is somewhat tricky 2517// because some Linux startup code invokes malloc() before it 2518// is in a good enough state to handle pthread_keycreate(). 2519// Therefore, we use TSD keys only after tsd_inited is set to true. 2520// Until then, we use a slow path to get the heap object. 2521static bool tsd_inited = false; 2522static pthread_key_t heap_key; 2523#if OS(WIN) 2524DWORD tlsIndex = TLS_OUT_OF_INDEXES; 2525#endif 2526 2527static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap) 2528{ 2529 // Still do pthread_setspecific even if there's an alternate form 2530 // of thread-local storage in use, to benefit from the delete callback. 2531 pthread_setspecific(heap_key, heap); 2532 2533#if OS(WIN) 2534 TlsSetValue(tlsIndex, heap); 2535#endif 2536} 2537 2538// Allocator for thread heaps 2539static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator; 2540 2541// Linked list of heap objects. Protected by pageheap_lock. 2542static TCMalloc_ThreadCache* thread_heaps = NULL; 2543static int thread_heap_count = 0; 2544 2545// Overall thread cache size. Protected by pageheap_lock. 2546static size_t overall_thread_cache_size = kDefaultOverallThreadCacheSize; 2547 2548// Global per-thread cache size. Writes are protected by 2549// pageheap_lock. Reads are done without any locking, which should be 2550// fine as long as size_t can be written atomically and we don't place 2551// invariants between this variable and other pieces of state. 2552static volatile size_t per_thread_cache_size = kMaxThreadCacheSize; 2553 2554//------------------------------------------------------------------- 2555// Central cache implementation 2556//------------------------------------------------------------------- 2557 2558void TCMalloc_Central_FreeList::Init(size_t cl, uintptr_t entropy) { 2559 lock_.Init(); 2560 size_class_ = cl; 2561 entropy_ = entropy; 2562#if ENABLE(TCMALLOC_HARDENING) 2563 ASSERT(entropy_); 2564#endif 2565 DLL_Init(&empty_, entropy_); 2566 DLL_Init(&nonempty_, entropy_); 2567 counter_ = 0; 2568 2569 cache_size_ = 1; 2570 used_slots_ = 0; 2571 ASSERT(cache_size_ <= kNumTransferEntries); 2572} 2573 2574void TCMalloc_Central_FreeList::ReleaseListToSpans(HardenedSLL start) { 2575 while (start) { 2576 HardenedSLL next = SLL_Next(start, entropy_); 2577 ReleaseToSpans(start); 2578 start = next; 2579 } 2580} 2581 2582ALWAYS_INLINE void TCMalloc_Central_FreeList::ReleaseToSpans(HardenedSLL object) { 2583 const PageID p = reinterpret_cast<uintptr_t>(object.value()) >> kPageShift; 2584 Span* span = pageheap->GetDescriptor(p); 2585 ASSERT(span != NULL); 2586 ASSERT(span->refcount > 0); 2587 2588 // If span is empty, move it to non-empty list 2589 if (!span->objects) { 2590 DLL_Remove(span, entropy_); 2591 DLL_Prepend(&nonempty_, span, entropy_); 2592 Event(span, 'N', 0); 2593 } 2594 2595 // The following check is expensive, so it is disabled by default 2596 if (false) { 2597 // Check that object does not occur in list 2598 unsigned got = 0; 2599 for (HardenedSLL p = span->objects; !p; SLL_Next(p, entropy_)) { 2600 ASSERT(p.value() != object.value()); 2601 got++; 2602 } 2603 ASSERT(got + span->refcount == 2604 (span->length<<kPageShift)/ByteSizeForClass(span->sizeclass)); 2605 } 2606 2607 counter_++; 2608 span->refcount--; 2609 if (span->refcount == 0) { 2610 Event(span, '#', 0); 2611 counter_ -= (span->length<<kPageShift) / ByteSizeForClass(span->sizeclass); 2612 DLL_Remove(span, entropy_); 2613 2614 // Release central list lock while operating on pageheap 2615 lock_.Unlock(); 2616 { 2617 SpinLockHolder h(&pageheap_lock); 2618 pageheap->Delete(span); 2619 } 2620 lock_.Lock(); 2621 } else { 2622 SLL_SetNext(object, span->objects, entropy_); 2623 span->objects.setValue(object.value()); 2624 } 2625} 2626 2627ALWAYS_INLINE bool TCMalloc_Central_FreeList::EvictRandomSizeClass( 2628 size_t locked_size_class, bool force) { 2629 static int race_counter = 0; 2630 int t = race_counter++; // Updated without a lock, but who cares. 2631 if (t >= static_cast<int>(kNumClasses)) { 2632 while (t >= static_cast<int>(kNumClasses)) { 2633 t -= kNumClasses; 2634 } 2635 race_counter = t; 2636 } 2637 ASSERT(t >= 0); 2638 ASSERT(t < static_cast<int>(kNumClasses)); 2639 if (t == static_cast<int>(locked_size_class)) return false; 2640 return central_cache[t].ShrinkCache(static_cast<int>(locked_size_class), force); 2641} 2642 2643bool TCMalloc_Central_FreeList::MakeCacheSpace() { 2644 // Is there room in the cache? 2645 if (used_slots_ < cache_size_) return true; 2646 // Check if we can expand this cache? 2647 if (cache_size_ == kNumTransferEntries) return false; 2648 // Ok, we'll try to grab an entry from some other size class. 2649 if (EvictRandomSizeClass(size_class_, false) || 2650 EvictRandomSizeClass(size_class_, true)) { 2651 // Succeeded in evicting, we're going to make our cache larger. 2652 cache_size_++; 2653 return true; 2654 } 2655 return false; 2656} 2657 2658 2659namespace { 2660class LockInverter { 2661 private: 2662 SpinLock *held_, *temp_; 2663 public: 2664 inline explicit LockInverter(SpinLock* held, SpinLock *temp) 2665 : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); } 2666 inline ~LockInverter() { temp_->Unlock(); held_->Lock(); } 2667}; 2668} 2669 2670bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class, bool force) { 2671 // Start with a quick check without taking a lock. 2672 if (cache_size_ == 0) return false; 2673 // We don't evict from a full cache unless we are 'forcing'. 2674 if (force == false && used_slots_ == cache_size_) return false; 2675 2676 // Grab lock, but first release the other lock held by this thread. We use 2677 // the lock inverter to ensure that we never hold two size class locks 2678 // concurrently. That can create a deadlock because there is no well 2679 // defined nesting order. 2680 LockInverter li(¢ral_cache[locked_size_class].lock_, &lock_); 2681 ASSERT(used_slots_ <= cache_size_); 2682 ASSERT(0 <= cache_size_); 2683 if (cache_size_ == 0) return false; 2684 if (used_slots_ == cache_size_) { 2685 if (force == false) return false; 2686 // ReleaseListToSpans releases the lock, so we have to make all the 2687 // updates to the central list before calling it. 2688 cache_size_--; 2689 used_slots_--; 2690 ReleaseListToSpans(tc_slots_[used_slots_].head); 2691 return true; 2692 } 2693 cache_size_--; 2694 return true; 2695} 2696 2697void TCMalloc_Central_FreeList::InsertRange(HardenedSLL start, HardenedSLL end, int N) { 2698 SpinLockHolder h(&lock_); 2699 if (N == num_objects_to_move[size_class_] && 2700 MakeCacheSpace()) { 2701 int slot = used_slots_++; 2702 ASSERT(slot >=0); 2703 ASSERT(slot < kNumTransferEntries); 2704 TCEntry *entry = &tc_slots_[slot]; 2705 entry->head = start; 2706 entry->tail = end; 2707 return; 2708 } 2709 ReleaseListToSpans(start); 2710} 2711 2712void TCMalloc_Central_FreeList::RemoveRange(HardenedSLL* start, HardenedSLL* end, int *N) { 2713 int num = *N; 2714 ASSERT(num > 0); 2715 2716 SpinLockHolder h(&lock_); 2717 if (num == num_objects_to_move[size_class_] && used_slots_ > 0) { 2718 int slot = --used_slots_; 2719 ASSERT(slot >= 0); 2720 TCEntry *entry = &tc_slots_[slot]; 2721 *start = entry->head; 2722 *end = entry->tail; 2723 return; 2724 } 2725 2726 // TODO: Prefetch multiple TCEntries? 2727 HardenedSLL tail = FetchFromSpansSafe(); 2728 if (!tail) { 2729 // We are completely out of memory. 2730 *start = *end = HardenedSLL::null(); 2731 *N = 0; 2732 return; 2733 } 2734 2735 SLL_SetNext(tail, HardenedSLL::null(), entropy_); 2736 HardenedSLL head = tail; 2737 int count = 1; 2738 while (count < num) { 2739 HardenedSLL t = FetchFromSpans(); 2740 if (!t) break; 2741 SLL_Push(&head, t, entropy_); 2742 count++; 2743 } 2744 *start = head; 2745 *end = tail; 2746 *N = count; 2747} 2748 2749 2750HardenedSLL TCMalloc_Central_FreeList::FetchFromSpansSafe() { 2751 HardenedSLL t = FetchFromSpans(); 2752 if (!t) { 2753 Populate(); 2754 t = FetchFromSpans(); 2755 } 2756 return t; 2757} 2758 2759HardenedSLL TCMalloc_Central_FreeList::FetchFromSpans() { 2760 if (DLL_IsEmpty(&nonempty_, entropy_)) return HardenedSLL::null(); 2761 Span* span = nonempty_.next(entropy_); 2762 2763 ASSERT(span->objects); 2764 ASSERT_SPAN_COMMITTED(span); 2765 span->refcount++; 2766 HardenedSLL result = span->objects; 2767 span->objects = SLL_Next(result, entropy_); 2768 if (!span->objects) { 2769 // Move to empty list 2770 DLL_Remove(span, entropy_); 2771 DLL_Prepend(&empty_, span, entropy_); 2772 Event(span, 'E', 0); 2773 } 2774 counter_--; 2775 return result; 2776} 2777 2778// Fetch memory from the system and add to the central cache freelist. 2779ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() { 2780 // Release central list lock while operating on pageheap 2781 lock_.Unlock(); 2782 const size_t npages = class_to_pages[size_class_]; 2783 2784 Span* span; 2785 { 2786 SpinLockHolder h(&pageheap_lock); 2787 span = pageheap->New(npages); 2788 if (span) pageheap->RegisterSizeClass(span, size_class_); 2789 } 2790 if (span == NULL) { 2791#if OS(WIN) 2792 MESSAGE("allocation failed: %d\n", ::GetLastError()); 2793#else 2794 MESSAGE("allocation failed: %d\n", errno); 2795#endif 2796 lock_.Lock(); 2797 return; 2798 } 2799 ASSERT_SPAN_COMMITTED(span); 2800 ASSERT(span->length == npages); 2801 // Cache sizeclass info eagerly. Locking is not necessary. 2802 // (Instead of being eager, we could just replace any stale info 2803 // about this span, but that seems to be no better in practice.) 2804 for (size_t i = 0; i < npages; i++) { 2805 pageheap->CacheSizeClass(span->start + i, size_class_); 2806 } 2807 2808 // Split the block into pieces and add to the free-list 2809 // TODO: coloring of objects to avoid cache conflicts? 2810 HardenedSLL head = HardenedSLL::null(); 2811 char* start = reinterpret_cast<char*>(span->start << kPageShift); 2812 const size_t size = ByteSizeForClass(size_class_); 2813 char* ptr = start + (npages << kPageShift) - ((npages << kPageShift) % size); 2814 int num = 0; 2815#if ENABLE(TCMALLOC_HARDENING) 2816 uint32_t startPoison = freedObjectStartPoison(); 2817 uint32_t endPoison = freedObjectEndPoison(); 2818#endif 2819 2820 while (ptr > start) { 2821 ptr -= size; 2822 HardenedSLL node = HardenedSLL::create(ptr); 2823 POISON_DEALLOCATION_EXPLICIT(ptr, size, startPoison, endPoison); 2824 SLL_SetNext(node, head, entropy_); 2825 head = node; 2826 num++; 2827 } 2828 ASSERT(ptr == start); 2829 ASSERT(ptr == head.value()); 2830#ifndef NDEBUG 2831 { 2832 HardenedSLL node = head; 2833 while (node) { 2834 ASSERT(IS_DEFINITELY_POISONED(node.value(), size)); 2835 node = SLL_Next(node, entropy_); 2836 } 2837 } 2838#endif 2839 span->objects = head; 2840 ASSERT(span->objects.value() == head.value()); 2841 span->refcount = 0; // No sub-object in use yet 2842 2843 // Add span to list of non-empty spans 2844 lock_.Lock(); 2845 DLL_Prepend(&nonempty_, span, entropy_); 2846 counter_ += num; 2847} 2848 2849//------------------------------------------------------------------- 2850// TCMalloc_ThreadCache implementation 2851//------------------------------------------------------------------- 2852 2853inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) { 2854 if (bytes_until_sample_ < k) { 2855 PickNextSample(k); 2856 return true; 2857 } else { 2858 bytes_until_sample_ -= k; 2859 return false; 2860 } 2861} 2862 2863void TCMalloc_ThreadCache::Init(ThreadIdentifier tid, uintptr_t entropy) { 2864 size_ = 0; 2865 next_ = NULL; 2866 prev_ = NULL; 2867 tid_ = tid; 2868 in_setspecific_ = false; 2869 entropy_ = entropy; 2870#if ENABLE(TCMALLOC_HARDENING) 2871 ASSERT(entropy_); 2872#endif 2873 for (size_t cl = 0; cl < kNumClasses; ++cl) { 2874 list_[cl].Init(entropy_); 2875 } 2876 2877 // Initialize RNG -- run it for a bit to get to good values 2878 bytes_until_sample_ = 0; 2879 rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this)); 2880 for (int i = 0; i < 100; i++) { 2881 PickNextSample(static_cast<size_t>(FLAGS_tcmalloc_sample_parameter * 2)); 2882 } 2883} 2884 2885void TCMalloc_ThreadCache::Cleanup() { 2886 // Put unused memory back into central cache 2887 for (size_t cl = 0; cl < kNumClasses; ++cl) { 2888 if (list_[cl].length() > 0) { 2889 ReleaseToCentralCache(cl, list_[cl].length()); 2890 } 2891 } 2892} 2893 2894ALWAYS_INLINE void* TCMalloc_ThreadCache::Allocate(size_t size) { 2895 ASSERT(size <= kMaxSize); 2896 const size_t cl = SizeClass(size); 2897 FreeList* list = &list_[cl]; 2898 size_t allocationSize = ByteSizeForClass(cl); 2899 if (list->empty()) { 2900 FetchFromCentralCache(cl, allocationSize); 2901 if (list->empty()) return NULL; 2902 } 2903 size_ -= allocationSize; 2904 void* result = list->Pop(); 2905 if (!result) 2906 return 0; 2907 RELEASE_ASSERT(IS_DEFINITELY_POISONED(result, allocationSize)); 2908 POISON_ALLOCATION(result, allocationSize); 2909 return result; 2910} 2911 2912inline void TCMalloc_ThreadCache::Deallocate(HardenedSLL ptr, size_t cl) { 2913 size_t allocationSize = ByteSizeForClass(cl); 2914 size_ += allocationSize; 2915 FreeList* list = &list_[cl]; 2916 if (MAY_BE_POISONED(ptr.value(), allocationSize)) 2917 list->Validate(ptr, allocationSize); 2918 2919 POISON_DEALLOCATION(ptr.value(), allocationSize); 2920 list->Push(ptr); 2921 // If enough data is free, put back into central cache 2922 if (list->length() > kMaxFreeListLength) { 2923 ReleaseToCentralCache(cl, num_objects_to_move[cl]); 2924 } 2925 if (size_ >= per_thread_cache_size) Scavenge(); 2926} 2927 2928// Remove some objects of class "cl" from central cache and add to thread heap 2929ALWAYS_INLINE void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t allocationSize) { 2930 int fetch_count = num_objects_to_move[cl]; 2931 HardenedSLL start, end; 2932 central_cache[cl].RemoveRange(&start, &end, &fetch_count); 2933 list_[cl].PushRange(fetch_count, start, end); 2934 size_ += allocationSize * fetch_count; 2935} 2936 2937// Remove some objects of class "cl" from thread heap and add to central cache 2938inline void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) { 2939 ASSERT(N > 0); 2940 FreeList* src = &list_[cl]; 2941 if (N > src->length()) N = src->length(); 2942 size_ -= N*ByteSizeForClass(cl); 2943 2944 // We return prepackaged chains of the correct size to the central cache. 2945 // TODO: Use the same format internally in the thread caches? 2946 int batch_size = num_objects_to_move[cl]; 2947 while (N > batch_size) { 2948 HardenedSLL tail, head; 2949 src->PopRange(batch_size, &head, &tail); 2950 central_cache[cl].InsertRange(head, tail, batch_size); 2951 N -= batch_size; 2952 } 2953 HardenedSLL tail, head; 2954 src->PopRange(N, &head, &tail); 2955 central_cache[cl].InsertRange(head, tail, N); 2956} 2957 2958// Release idle memory to the central cache 2959inline void TCMalloc_ThreadCache::Scavenge() { 2960 // If the low-water mark for the free list is L, it means we would 2961 // not have had to allocate anything from the central cache even if 2962 // we had reduced the free list size by L. We aim to get closer to 2963 // that situation by dropping L/2 nodes from the free list. This 2964 // may not release much memory, but if so we will call scavenge again 2965 // pretty soon and the low-water marks will be high on that call. 2966 //int64 start = CycleClock::Now(); 2967 2968 for (size_t cl = 0; cl < kNumClasses; cl++) { 2969 FreeList* list = &list_[cl]; 2970 const int lowmark = list->lowwatermark(); 2971 if (lowmark > 0) { 2972 const int drop = (lowmark > 1) ? lowmark/2 : 1; 2973 ReleaseToCentralCache(cl, drop); 2974 } 2975 list->clear_lowwatermark(); 2976 } 2977 2978 //int64 finish = CycleClock::Now(); 2979 //CycleTimer ct; 2980 //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0); 2981} 2982 2983void TCMalloc_ThreadCache::PickNextSample(size_t k) { 2984 // Make next "random" number 2985 // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers 2986 static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0); 2987 uint32_t r = rnd_; 2988 rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly); 2989 2990 // Next point is "rnd_ % (sample_period)". I.e., average 2991 // increment is "sample_period/2". 2992 const int flag_value = static_cast<int>(FLAGS_tcmalloc_sample_parameter); 2993 static int last_flag_value = -1; 2994 2995 if (flag_value != last_flag_value) { 2996 SpinLockHolder h(&sample_period_lock); 2997 int i; 2998 for (i = 0; i < (static_cast<int>(sizeof(primes_list)/sizeof(primes_list[0])) - 1); i++) { 2999 if (primes_list[i] >= flag_value) { 3000 break; 3001 } 3002 } 3003 sample_period = primes_list[i]; 3004 last_flag_value = flag_value; 3005 } 3006 3007 bytes_until_sample_ += rnd_ % sample_period; 3008 3009 if (k > (static_cast<size_t>(-1) >> 2)) { 3010 // If the user has asked for a huge allocation then it is possible 3011 // for the code below to loop infinitely. Just return (note that 3012 // this throws off the sampling accuracy somewhat, but a user who 3013 // is allocating more than 1G of memory at a time can live with a 3014 // minor inaccuracy in profiling of small allocations, and also 3015 // would rather not wait for the loop below to terminate). 3016 return; 3017 } 3018 3019 while (bytes_until_sample_ < k) { 3020 // Increase bytes_until_sample_ by enough average sampling periods 3021 // (sample_period >> 1) to allow us to sample past the current 3022 // allocation. 3023 bytes_until_sample_ += (sample_period >> 1); 3024 } 3025 3026 bytes_until_sample_ -= k; 3027} 3028 3029void TCMalloc_ThreadCache::InitModule() { 3030 // There is a slight potential race here because of double-checked 3031 // locking idiom. However, as long as the program does a small 3032 // allocation before switching to multi-threaded mode, we will be 3033 // fine. We increase the chances of doing such a small allocation 3034 // by doing one in the constructor of the module_enter_exit_hook 3035 // object declared below. 3036 SpinLockHolder h(&pageheap_lock); 3037 if (!phinited) { 3038 uintptr_t entropy = HARDENING_ENTROPY; 3039 InitTSD(); 3040 InitSizeClasses(); 3041 threadheap_allocator.Init(entropy); 3042 span_allocator.Init(entropy); 3043 span_allocator.New(); // Reduce cache conflicts 3044 span_allocator.New(); // Reduce cache conflicts 3045 stacktrace_allocator.Init(entropy); 3046 DLL_Init(&sampled_objects, entropy); 3047 for (size_t i = 0; i < kNumClasses; ++i) { 3048 central_cache[i].Init(i, entropy); 3049 } 3050 pageheap->init(); 3051 phinited = 1; 3052#if OS(MACOSX) 3053 FastMallocZone::init(); 3054#endif 3055 } 3056} 3057 3058inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::NewHeap(ThreadIdentifier tid, uintptr_t entropy) { 3059 // Create the heap and add it to the linked list 3060 TCMalloc_ThreadCache *heap = threadheap_allocator.New(); 3061 heap->Init(tid, entropy); 3062 heap->next_ = thread_heaps; 3063 heap->prev_ = NULL; 3064 if (thread_heaps != NULL) thread_heaps->prev_ = heap; 3065 thread_heaps = heap; 3066 thread_heap_count++; 3067 RecomputeThreadCacheSize(); 3068 return heap; 3069} 3070 3071inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() { 3072#ifdef HAVE_TLS 3073 // __thread is faster, but only when the kernel supports it 3074 if (KernelSupportsTLS()) 3075 return threadlocal_heap; 3076#elif OS(WIN) 3077 return static_cast<TCMalloc_ThreadCache*>(TlsGetValue(tlsIndex)); 3078#else 3079 return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key)); 3080#endif 3081} 3082 3083inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() { 3084 TCMalloc_ThreadCache* ptr = NULL; 3085 if (!tsd_inited) { 3086 InitModule(); 3087 } else { 3088 ptr = GetThreadHeap(); 3089 } 3090 if (ptr == NULL) ptr = CreateCacheIfNecessary(); 3091 return ptr; 3092} 3093 3094// In deletion paths, we do not try to create a thread-cache. This is 3095// because we may be in the thread destruction code and may have 3096// already cleaned up the cache for this thread. 3097inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() { 3098 if (!tsd_inited) return NULL; 3099 void* const p = GetThreadHeap(); 3100 return reinterpret_cast<TCMalloc_ThreadCache*>(p); 3101} 3102 3103void TCMalloc_ThreadCache::InitTSD() { 3104 ASSERT(!tsd_inited); 3105 pthread_key_create(&heap_key, DestroyThreadCache); 3106#if OS(WIN) 3107 tlsIndex = TlsAlloc(); 3108#endif 3109 tsd_inited = true; 3110 3111#if !OS(WIN) 3112 // We may have used a fake pthread_t for the main thread. Fix it. 3113 pthread_t zero; 3114 memset(&zero, 0, sizeof(zero)); 3115#endif 3116 ASSERT(pageheap_lock.IsHeld()); 3117 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { 3118#if OS(WIN) 3119 if (h->tid_ == 0) { 3120 h->tid_ = GetCurrentThreadId(); 3121 } 3122#else 3123 if (pthread_equal(h->tid_, zero)) { 3124 h->tid_ = pthread_self(); 3125 } 3126#endif 3127 } 3128} 3129 3130TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() { 3131 // Initialize per-thread data if necessary 3132 TCMalloc_ThreadCache* heap = NULL; 3133 { 3134 SpinLockHolder h(&pageheap_lock); 3135 3136#if OS(WIN) 3137 DWORD me; 3138 if (!tsd_inited) { 3139 me = 0; 3140 } else { 3141 me = GetCurrentThreadId(); 3142 } 3143#else 3144 // Early on in glibc's life, we cannot even call pthread_self() 3145 pthread_t me; 3146 if (!tsd_inited) { 3147 memset(&me, 0, sizeof(me)); 3148 } else { 3149 me = pthread_self(); 3150 } 3151#endif 3152 3153 // This may be a recursive malloc call from pthread_setspecific() 3154 // In that case, the heap for this thread has already been created 3155 // and added to the linked list. So we search for that first. 3156 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { 3157#if OS(WIN) 3158 if (h->tid_ == me) { 3159#else 3160 if (pthread_equal(h->tid_, me)) { 3161#endif 3162 heap = h; 3163 break; 3164 } 3165 } 3166 3167 if (heap == NULL) heap = NewHeap(me, HARDENING_ENTROPY); 3168 } 3169 3170 // We call pthread_setspecific() outside the lock because it may 3171 // call malloc() recursively. The recursive call will never get 3172 // here again because it will find the already allocated heap in the 3173 // linked list of heaps. 3174 if (!heap->in_setspecific_ && tsd_inited) { 3175 heap->in_setspecific_ = true; 3176 setThreadHeap(heap); 3177 } 3178 return heap; 3179} 3180 3181void TCMalloc_ThreadCache::BecomeIdle() { 3182 if (!tsd_inited) return; // No caches yet 3183 TCMalloc_ThreadCache* heap = GetThreadHeap(); 3184 if (heap == NULL) return; // No thread cache to remove 3185 if (heap->in_setspecific_) return; // Do not disturb the active caller 3186 3187 heap->in_setspecific_ = true; 3188 setThreadHeap(NULL); 3189#ifdef HAVE_TLS 3190 // Also update the copy in __thread 3191 threadlocal_heap = NULL; 3192#endif 3193 heap->in_setspecific_ = false; 3194 if (GetThreadHeap() == heap) { 3195 // Somehow heap got reinstated by a recursive call to malloc 3196 // from pthread_setspecific. We give up in this case. 3197 return; 3198 } 3199 3200 // We can now get rid of the heap 3201 DeleteCache(heap); 3202} 3203 3204void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr) { 3205 // Note that "ptr" cannot be NULL since pthread promises not 3206 // to invoke the destructor on NULL values, but for safety, 3207 // we check anyway. 3208 if (ptr == NULL) return; 3209#ifdef HAVE_TLS 3210 // Prevent fast path of GetThreadHeap() from returning heap. 3211 threadlocal_heap = NULL; 3212#endif 3213 DeleteCache(reinterpret_cast<TCMalloc_ThreadCache*>(ptr)); 3214} 3215 3216void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache* heap) { 3217 // Remove all memory from heap 3218 heap->Cleanup(); 3219 3220 // Remove from linked list 3221 SpinLockHolder h(&pageheap_lock); 3222 if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_; 3223 if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_; 3224 if (thread_heaps == heap) thread_heaps = heap->next_; 3225 thread_heap_count--; 3226 RecomputeThreadCacheSize(); 3227 3228 threadheap_allocator.Delete(heap); 3229} 3230 3231void TCMalloc_ThreadCache::RecomputeThreadCacheSize() { 3232 // Divide available space across threads 3233 int n = thread_heap_count > 0 ? thread_heap_count : 1; 3234 size_t space = overall_thread_cache_size / n; 3235 3236 // Limit to allowed range 3237 if (space < kMinThreadCacheSize) space = kMinThreadCacheSize; 3238 if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize; 3239 3240 per_thread_cache_size = space; 3241} 3242 3243void TCMalloc_ThreadCache::Print() const { 3244 for (size_t cl = 0; cl < kNumClasses; ++cl) { 3245 MESSAGE(" %5" PRIuS " : %4d len; %4d lo\n", 3246 ByteSizeForClass(cl), 3247 list_[cl].length(), 3248 list_[cl].lowwatermark()); 3249 } 3250} 3251 3252// Extract interesting stats 3253struct TCMallocStats { 3254 uint64_t system_bytes; // Bytes alloced from system 3255 uint64_t thread_bytes; // Bytes in thread caches 3256 uint64_t central_bytes; // Bytes in central cache 3257 uint64_t transfer_bytes; // Bytes in central transfer cache 3258 uint64_t pageheap_bytes; // Bytes in page heap 3259 uint64_t metadata_bytes; // Bytes alloced for metadata 3260}; 3261 3262// The constructor allocates an object to ensure that initialization 3263// runs before main(), and therefore we do not have a chance to become 3264// multi-threaded before initialization. We also create the TSD key 3265// here. Presumably by the time this constructor runs, glibc is in 3266// good enough shape to handle pthread_key_create(). 3267// 3268// The constructor also takes the opportunity to tell STL to use 3269// tcmalloc. We want to do this early, before construct time, so 3270// all user STL allocations go through tcmalloc (which works really 3271// well for STL). 3272// 3273// The destructor prints stats when the program exits. 3274class TCMallocGuard { 3275 public: 3276 3277 TCMallocGuard() { 3278#ifdef HAVE_TLS // this is true if the cc/ld/libc combo support TLS 3279 // Check whether the kernel also supports TLS (needs to happen at runtime) 3280 CheckIfKernelSupportsTLS(); 3281#endif 3282 free(malloc(1)); 3283 TCMalloc_ThreadCache::InitTSD(); 3284 free(malloc(1)); 3285 } 3286}; 3287 3288//------------------------------------------------------------------- 3289// Helpers for the exported routines below 3290//------------------------------------------------------------------- 3291 3292#if !ASSERT_DISABLED 3293static inline bool CheckCachedSizeClass(void *ptr) { 3294 PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 3295 size_t cached_value = pageheap->GetSizeClassIfCached(p); 3296 return cached_value == 0 || 3297 cached_value == pageheap->GetDescriptor(p)->sizeclass; 3298} 3299#endif 3300 3301static inline void* CheckedMallocResult(void *result) 3302{ 3303 ASSERT(result == 0 || CheckCachedSizeClass(result)); 3304 return result; 3305} 3306 3307static inline void* SpanToMallocResult(Span *span) { 3308 ASSERT_SPAN_COMMITTED(span); 3309 pageheap->CacheSizeClass(span->start, 0); 3310 void* result = reinterpret_cast<void*>(span->start << kPageShift); 3311 POISON_ALLOCATION(result, span->length << kPageShift); 3312 return CheckedMallocResult(result); 3313} 3314 3315static ALWAYS_INLINE void* do_malloc(size_t size) { 3316 void* ret = 0; 3317 3318 ASSERT(!isForbidden()); 3319 3320 // The following call forces module initialization 3321 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache(); 3322 if (size > kMaxSize) { 3323 // Use page-level allocator 3324 SpinLockHolder h(&pageheap_lock); 3325 Span* span = pageheap->New(pages(size)); 3326 if (span) 3327 ret = SpanToMallocResult(span); 3328 } else { 3329 // The common case, and also the simplest. This just pops the 3330 // size-appropriate freelist, afer replenishing it if it's empty. 3331 ret = CheckedMallocResult(heap->Allocate(size)); 3332 } 3333 // This is the out-of-memory crash line. 3334 RELEASE_ASSERT(ret); 3335 return ret; 3336} 3337 3338static ALWAYS_INLINE void do_free(void* ptr) { 3339 if (ptr == NULL) return; 3340 ASSERT(pageheap != NULL); // Should not call free() before malloc() 3341 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 3342 Span* span = NULL; 3343 size_t cl = pageheap->GetSizeClassIfCached(p); 3344 3345 if (cl == 0) { 3346 span = pageheap->GetDescriptor(p); 3347 RELEASE_ASSERT(span->isValid()); 3348 cl = span->sizeclass; 3349 pageheap->CacheSizeClass(p, cl); 3350 } 3351 if (cl != 0) { 3352#ifndef NO_TCMALLOC_SAMPLES 3353 ASSERT(!pageheap->GetDescriptor(p)->sample); 3354#endif 3355 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent(); 3356 if (heap != NULL) { 3357 heap->Deallocate(HardenedSLL::create(ptr), cl); 3358 } else { 3359 // Delete directly into central cache 3360 POISON_DEALLOCATION(ptr, ByteSizeForClass(cl)); 3361 SLL_SetNext(HardenedSLL::create(ptr), HardenedSLL::null(), central_cache[cl].entropy()); 3362 central_cache[cl].InsertRange(HardenedSLL::create(ptr), HardenedSLL::create(ptr), 1); 3363 } 3364 } else { 3365 SpinLockHolder h(&pageheap_lock); 3366 ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); 3367 ASSERT(span != NULL && span->start == p); 3368#ifndef NO_TCMALLOC_SAMPLES 3369 if (span->sample) { 3370 DLL_Remove(span); 3371 stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects)); 3372 span->objects = NULL; 3373 } 3374#endif 3375 3376 POISON_DEALLOCATION(ptr, span->length << kPageShift); 3377 pageheap->Delete(span); 3378 } 3379} 3380 3381// Helpers for use by exported routines below: 3382 3383#ifdef HAVE_STRUCT_MALLINFO // mallinfo isn't defined on freebsd, for instance 3384static inline struct mallinfo do_mallinfo() { 3385 TCMallocStats stats; 3386 ExtractStats(&stats, NULL); 3387 3388 // Just some of the fields are filled in. 3389 struct mallinfo info; 3390 memset(&info, 0, sizeof(info)); 3391 3392 // Unfortunately, the struct contains "int" field, so some of the 3393 // size values will be truncated. 3394 info.arena = static_cast<int>(stats.system_bytes); 3395 info.fsmblks = static_cast<int>(stats.thread_bytes 3396 + stats.central_bytes 3397 + stats.transfer_bytes); 3398 info.fordblks = static_cast<int>(stats.pageheap_bytes); 3399 info.uordblks = static_cast<int>(stats.system_bytes 3400 - stats.thread_bytes 3401 - stats.central_bytes 3402 - stats.transfer_bytes 3403 - stats.pageheap_bytes); 3404 3405 return info; 3406} 3407#endif 3408 3409//------------------------------------------------------------------- 3410// Exported routines 3411//------------------------------------------------------------------- 3412 3413// CAVEAT: The code structure below ensures that MallocHook methods are always 3414// called from the stack frame of the invoked allocation function. 3415// heap-checker.cc depends on this to start a stack trace from 3416// the call to the (de)allocation function. 3417 3418void* fastMalloc(size_t size) 3419{ 3420 return do_malloc(size); 3421} 3422 3423void fastFree(void* ptr) 3424{ 3425 do_free(ptr); 3426} 3427 3428void* fastCalloc(size_t n, size_t elem_size) 3429{ 3430 size_t totalBytes = n * elem_size; 3431 3432 // Protect against overflow 3433 if (n > 1 && elem_size && (totalBytes / elem_size) != n) 3434 return 0; 3435 3436 void* result = do_malloc(totalBytes); 3437 memset(result, 0, totalBytes); 3438 3439 return result; 3440} 3441 3442void* fastRealloc(void* old_ptr, size_t new_size) 3443{ 3444 if (old_ptr == NULL) { 3445 return do_malloc(new_size); 3446 } 3447 if (new_size == 0) { 3448 free(old_ptr); 3449 return NULL; 3450 } 3451 3452 // Get the size of the old entry 3453 const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift; 3454 size_t cl = pageheap->GetSizeClassIfCached(p); 3455 Span *span = NULL; 3456 size_t old_size; 3457 if (cl == 0) { 3458 span = pageheap->GetDescriptor(p); 3459 cl = span->sizeclass; 3460 pageheap->CacheSizeClass(p, cl); 3461 } 3462 if (cl != 0) { 3463 old_size = ByteSizeForClass(cl); 3464 } else { 3465 ASSERT(span != NULL); 3466 old_size = span->length << kPageShift; 3467 } 3468 3469 // Reallocate if the new size is larger than the old size, 3470 // or if the new size is significantly smaller than the old size. 3471 if ((new_size > old_size) || (AllocationSize(new_size) < old_size)) { 3472 // Need to reallocate 3473 void* new_ptr = do_malloc(new_size); 3474 memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size)); 3475 // We could use a variant of do_free() that leverages the fact 3476 // that we already know the sizeclass of old_ptr. The benefit 3477 // would be small, so don't bother. 3478 do_free(old_ptr); 3479 return new_ptr; 3480 } else { 3481 return old_ptr; 3482 } 3483} 3484 3485void releaseFastMallocFreeMemory() 3486{ 3487 // Flush free pages in the current thread cache back to the page heap. 3488 if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) 3489 threadCache->Cleanup(); 3490 3491 SpinLockHolder h(&pageheap_lock); 3492 pageheap->ReleaseFreePages(); 3493} 3494 3495FastMallocStatistics fastMallocStatistics() 3496{ 3497 FastMallocStatistics statistics; 3498 3499 SpinLockHolder lockHolder(&pageheap_lock); 3500 statistics.reservedVMBytes = static_cast<size_t>(pageheap->SystemBytes()); 3501 statistics.committedVMBytes = statistics.reservedVMBytes - pageheap->ReturnedBytes(); 3502 3503 statistics.freeListBytes = 0; 3504 for (unsigned cl = 0; cl < kNumClasses; ++cl) { 3505 const int length = central_cache[cl].length(); 3506 const int tc_length = central_cache[cl].tc_length(); 3507 3508 statistics.freeListBytes += ByteSizeForClass(cl) * (length + tc_length); 3509 } 3510 for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_) 3511 statistics.freeListBytes += threadCache->Size(); 3512 3513 return statistics; 3514} 3515 3516#if OS(MACOSX) 3517 3518template <typename T> 3519T* RemoteMemoryReader::nextEntryInHardenedLinkedList(T** remoteAddress, uintptr_t entropy) const 3520{ 3521 T** localAddress = (*this)(remoteAddress); 3522 if (!localAddress) 3523 return 0; 3524 T* hardenedNext = *localAddress; 3525 if (!hardenedNext || hardenedNext == (void*)entropy) 3526 return 0; 3527 return XOR_MASK_PTR_WITH_KEY(hardenedNext, remoteAddress, entropy); 3528} 3529 3530class FreeObjectFinder { 3531 const RemoteMemoryReader& m_reader; 3532 HashSet<void*> m_freeObjects; 3533 3534public: 3535 FreeObjectFinder(const RemoteMemoryReader& reader) : m_reader(reader) { } 3536 3537 void visit(void* ptr) { m_freeObjects.add(ptr); } 3538 bool isFreeObject(void* ptr) const { return m_freeObjects.contains(ptr); } 3539 bool isFreeObject(vm_address_t ptr) const { return isFreeObject(reinterpret_cast<void*>(ptr)); } 3540 size_t freeObjectCount() const { return m_freeObjects.size(); } 3541 3542 void findFreeObjects(TCMalloc_ThreadCache* threadCache) 3543 { 3544 for (; threadCache; threadCache = (threadCache->next_ ? m_reader(threadCache->next_) : 0)) 3545 threadCache->enumerateFreeObjects(*this, m_reader); 3546 } 3547 3548 void findFreeObjects(TCMalloc_Central_FreeListPadded* centralFreeList, size_t numSizes, TCMalloc_Central_FreeListPadded* remoteCentralFreeList) 3549 { 3550 for (unsigned i = 0; i < numSizes; i++) 3551 centralFreeList[i].enumerateFreeObjects(*this, m_reader, remoteCentralFreeList + i); 3552 } 3553}; 3554 3555class PageMapFreeObjectFinder { 3556 const RemoteMemoryReader& m_reader; 3557 FreeObjectFinder& m_freeObjectFinder; 3558 uintptr_t m_entropy; 3559 3560public: 3561 PageMapFreeObjectFinder(const RemoteMemoryReader& reader, FreeObjectFinder& freeObjectFinder, uintptr_t entropy) 3562 : m_reader(reader) 3563 , m_freeObjectFinder(freeObjectFinder) 3564 , m_entropy(entropy) 3565 { 3566#if ENABLE(TCMALLOC_HARDENING) 3567 ASSERT(m_entropy); 3568#endif 3569 } 3570 3571 int visit(void* ptr) const 3572 { 3573 if (!ptr) 3574 return 1; 3575 3576 Span* span = m_reader(reinterpret_cast<Span*>(ptr)); 3577 if (!span) 3578 return 1; 3579 3580 if (span->free) { 3581 void* ptr = reinterpret_cast<void*>(span->start << kPageShift); 3582 m_freeObjectFinder.visit(ptr); 3583 } else if (span->sizeclass) { 3584 // Walk the free list of the small-object span, keeping track of each object seen 3585 for (HardenedSLL nextObject = span->objects; nextObject; nextObject.setValue(m_reader.nextEntryInHardenedLinkedList(reinterpret_cast<void**>(nextObject.value()), m_entropy))) 3586 m_freeObjectFinder.visit(nextObject.value()); 3587 } 3588 return span->length; 3589 } 3590}; 3591 3592class PageMapMemoryUsageRecorder { 3593 task_t m_task; 3594 void* m_context; 3595 unsigned m_typeMask; 3596 vm_range_recorder_t* m_recorder; 3597 const RemoteMemoryReader& m_reader; 3598 const FreeObjectFinder& m_freeObjectFinder; 3599 3600 HashSet<void*> m_seenPointers; 3601 Vector<Span*> m_coalescedSpans; 3602 3603public: 3604 PageMapMemoryUsageRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader, const FreeObjectFinder& freeObjectFinder) 3605 : m_task(task) 3606 , m_context(context) 3607 , m_typeMask(typeMask) 3608 , m_recorder(recorder) 3609 , m_reader(reader) 3610 , m_freeObjectFinder(freeObjectFinder) 3611 { } 3612 3613 ~PageMapMemoryUsageRecorder() 3614 { 3615 ASSERT(!m_coalescedSpans.size()); 3616 } 3617 3618 void recordPendingRegions() 3619 { 3620 if (!(m_typeMask & (MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE))) { 3621 m_coalescedSpans.clear(); 3622 return; 3623 } 3624 3625 Vector<vm_range_t, 1024> allocatedPointers; 3626 for (size_t i = 0; i < m_coalescedSpans.size(); ++i) { 3627 Span *theSpan = m_coalescedSpans[i]; 3628 if (theSpan->free) 3629 continue; 3630 3631 vm_address_t spanStartAddress = theSpan->start << kPageShift; 3632 vm_size_t spanSizeInBytes = theSpan->length * kPageSize; 3633 3634 if (!theSpan->sizeclass) { 3635 // If it's an allocated large object span, mark it as in use 3636 if (!m_freeObjectFinder.isFreeObject(spanStartAddress)) 3637 allocatedPointers.append((vm_range_t){spanStartAddress, spanSizeInBytes}); 3638 } else { 3639 const size_t objectSize = ByteSizeForClass(theSpan->sizeclass); 3640 3641 // Mark each allocated small object within the span as in use 3642 const vm_address_t endOfSpan = spanStartAddress + spanSizeInBytes; 3643 for (vm_address_t object = spanStartAddress; object + objectSize <= endOfSpan; object += objectSize) { 3644 if (!m_freeObjectFinder.isFreeObject(object)) 3645 allocatedPointers.append((vm_range_t){object, objectSize}); 3646 } 3647 } 3648 } 3649 3650 (*m_recorder)(m_task, m_context, m_typeMask & (MALLOC_PTR_IN_USE_RANGE_TYPE | MALLOC_PTR_REGION_RANGE_TYPE), allocatedPointers.data(), allocatedPointers.size()); 3651 3652 m_coalescedSpans.clear(); 3653 } 3654 3655 int visit(void* ptr) 3656 { 3657 if (!ptr) 3658 return 1; 3659 3660 Span* span = m_reader(reinterpret_cast<Span*>(ptr)); 3661 if (!span || !span->start) 3662 return 1; 3663 3664 if (!m_seenPointers.add(ptr).isNewEntry) 3665 return span->length; 3666 3667 if (!m_coalescedSpans.size()) { 3668 m_coalescedSpans.append(span); 3669 return span->length; 3670 } 3671 3672 Span* previousSpan = m_coalescedSpans[m_coalescedSpans.size() - 1]; 3673 vm_address_t previousSpanStartAddress = previousSpan->start << kPageShift; 3674 vm_size_t previousSpanSizeInBytes = previousSpan->length * kPageSize; 3675 3676 // If the new span is adjacent to the previous span, do nothing for now. 3677 vm_address_t spanStartAddress = span->start << kPageShift; 3678 if (spanStartAddress == previousSpanStartAddress + previousSpanSizeInBytes) { 3679 m_coalescedSpans.append(span); 3680 return span->length; 3681 } 3682 3683 // New span is not adjacent to previous span, so record the spans coalesced so far. 3684 recordPendingRegions(); 3685 m_coalescedSpans.append(span); 3686 3687 return span->length; 3688 } 3689}; 3690 3691class AdminRegionRecorder { 3692 task_t m_task; 3693 void* m_context; 3694 unsigned m_typeMask; 3695 vm_range_recorder_t* m_recorder; 3696 3697 Vector<vm_range_t, 1024> m_pendingRegions; 3698 3699public: 3700 AdminRegionRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder) 3701 : m_task(task) 3702 , m_context(context) 3703 , m_typeMask(typeMask) 3704 , m_recorder(recorder) 3705 { } 3706 3707 void recordRegion(vm_address_t ptr, size_t size) 3708 { 3709 if (m_typeMask & MALLOC_ADMIN_REGION_RANGE_TYPE) 3710 m_pendingRegions.append((vm_range_t){ ptr, size }); 3711 } 3712 3713 void visit(void *ptr, size_t size) 3714 { 3715 recordRegion(reinterpret_cast<vm_address_t>(ptr), size); 3716 } 3717 3718 void recordPendingRegions() 3719 { 3720 if (m_pendingRegions.size()) { 3721 (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, m_pendingRegions.data(), m_pendingRegions.size()); 3722 m_pendingRegions.clear(); 3723 } 3724 } 3725 3726 ~AdminRegionRecorder() 3727 { 3728 ASSERT(!m_pendingRegions.size()); 3729 } 3730}; 3731 3732kern_return_t FastMallocZone::enumerate(task_t task, void* context, unsigned typeMask, vm_address_t zoneAddress, memory_reader_t reader, vm_range_recorder_t recorder) 3733{ 3734 RemoteMemoryReader memoryReader(task, reader); 3735 3736 InitSizeClasses(); 3737 3738 FastMallocZone* mzone = memoryReader(reinterpret_cast<FastMallocZone*>(zoneAddress)); 3739 TCMalloc_PageHeap* pageHeap = memoryReader(mzone->m_pageHeap); 3740 TCMalloc_ThreadCache** threadHeapsPointer = memoryReader(mzone->m_threadHeaps); 3741 TCMalloc_ThreadCache* threadHeaps = memoryReader(*threadHeapsPointer); 3742 3743 TCMalloc_Central_FreeListPadded* centralCaches = memoryReader(mzone->m_centralCaches, sizeof(TCMalloc_Central_FreeListPadded) * kNumClasses); 3744 3745 FreeObjectFinder finder(memoryReader); 3746 finder.findFreeObjects(threadHeaps); 3747 finder.findFreeObjects(centralCaches, kNumClasses, mzone->m_centralCaches); 3748 3749 TCMalloc_PageHeap::PageMap* pageMap = &pageHeap->pagemap_; 3750 PageMapFreeObjectFinder pageMapFinder(memoryReader, finder, pageHeap->entropy_); 3751 pageMap->visitValues(pageMapFinder, memoryReader); 3752 3753 PageMapMemoryUsageRecorder usageRecorder(task, context, typeMask, recorder, memoryReader, finder); 3754 pageMap->visitValues(usageRecorder, memoryReader); 3755 usageRecorder.recordPendingRegions(); 3756 3757 AdminRegionRecorder adminRegionRecorder(task, context, typeMask, recorder); 3758 pageMap->visitAllocations(adminRegionRecorder, memoryReader); 3759 3760 PageHeapAllocator<Span>* spanAllocator = memoryReader(mzone->m_spanAllocator); 3761 PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator = memoryReader(mzone->m_pageHeapAllocator); 3762 3763 spanAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader); 3764 pageHeapAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader); 3765 3766 adminRegionRecorder.recordPendingRegions(); 3767 3768 return 0; 3769} 3770 3771size_t FastMallocZone::size(malloc_zone_t*, const void*) 3772{ 3773 return 0; 3774} 3775 3776void* FastMallocZone::zoneMalloc(malloc_zone_t*, size_t) 3777{ 3778 return 0; 3779} 3780 3781void* FastMallocZone::zoneCalloc(malloc_zone_t*, size_t, size_t) 3782{ 3783 return 0; 3784} 3785 3786void FastMallocZone::zoneFree(malloc_zone_t*, void* ptr) 3787{ 3788 // Due to <rdar://problem/5671357> zoneFree may be called by the system free even if the pointer 3789 // is not in this zone. When this happens, the pointer being freed was not allocated by any 3790 // zone so we need to print a useful error for the application developer. 3791 malloc_printf("*** error for object %p: pointer being freed was not allocated\n", ptr); 3792} 3793 3794void* FastMallocZone::zoneRealloc(malloc_zone_t*, void*, size_t) 3795{ 3796 return 0; 3797} 3798 3799 3800#undef malloc 3801#undef free 3802#undef realloc 3803#undef calloc 3804 3805extern "C" { 3806malloc_introspection_t jscore_fastmalloc_introspection = { &FastMallocZone::enumerate, &FastMallocZone::goodSize, &FastMallocZone::check, &FastMallocZone::print, 3807 &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics 3808 3809#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 1060 3810 , 0 // zone_locked will not be called on the zone unless it advertises itself as version five or higher. 3811#endif 3812#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 1070 3813 , 0, 0, 0, 0 // These members will not be used unless the zone advertises itself as version seven or higher. 3814#endif 3815 3816 }; 3817} 3818 3819FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches, PageHeapAllocator<Span>* spanAllocator, PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator) 3820 : m_pageHeap(pageHeap) 3821 , m_threadHeaps(threadHeaps) 3822 , m_centralCaches(centralCaches) 3823 , m_spanAllocator(spanAllocator) 3824 , m_pageHeapAllocator(pageHeapAllocator) 3825{ 3826 memset(&m_zone, 0, sizeof(m_zone)); 3827 m_zone.version = 4; 3828 m_zone.zone_name = "JavaScriptCore FastMalloc"; 3829 m_zone.size = &FastMallocZone::size; 3830 m_zone.malloc = &FastMallocZone::zoneMalloc; 3831 m_zone.calloc = &FastMallocZone::zoneCalloc; 3832 m_zone.realloc = &FastMallocZone::zoneRealloc; 3833 m_zone.free = &FastMallocZone::zoneFree; 3834 m_zone.valloc = &FastMallocZone::zoneValloc; 3835 m_zone.destroy = &FastMallocZone::zoneDestroy; 3836 m_zone.introspect = &jscore_fastmalloc_introspection; 3837 malloc_zone_register(&m_zone); 3838} 3839 3840 3841void FastMallocZone::init() 3842{ 3843 static FastMallocZone zone(pageheap, &thread_heaps, static_cast<TCMalloc_Central_FreeListPadded*>(central_cache), &span_allocator, &threadheap_allocator); 3844} 3845 3846#endif // OS(MACOSX) 3847 3848} // namespace WTF 3849 3850#endif // FORCE_SYSTEM_MALLOC 3851