1// Copyright (c) 2005, 2007, Google Inc. 2// All rights reserved. 3// Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011 Apple Inc. All rights reserved. 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions are 7// met: 8// 9// * Redistributions of source code must retain the above copyright 10// notice, this list of conditions and the following disclaimer. 11// * Redistributions in binary form must reproduce the above 12// copyright notice, this list of conditions and the following disclaimer 13// in the documentation and/or other materials provided with the 14// distribution. 15// * Neither the name of Google Inc. nor the names of its 16// contributors may be used to endorse or promote products derived from 17// this software without specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31// --- 32// Author: Sanjay Ghemawat <opensource@google.com> 33// 34// A malloc that uses a per-thread cache to satisfy small malloc requests. 35// (The time for malloc/free of a small object drops from 300 ns to 50 ns.) 36// 37// See doc/tcmalloc.html for a high-level 38// description of how this malloc works. 39// 40// SYNCHRONIZATION 41// 1. The thread-specific lists are accessed without acquiring any locks. 42// This is safe because each such list is only accessed by one thread. 43// 2. We have a lock per central free-list, and hold it while manipulating 44// the central free list for a particular size. 45// 3. The central page allocator is protected by "pageheap_lock". 46// 4. The pagemap (which maps from page-number to descriptor), 47// can be read without holding any locks, and written while holding 48// the "pageheap_lock". 49// 5. To improve performance, a subset of the information one can get 50// from the pagemap is cached in a data structure, pagemap_cache_, 51// that atomically reads and writes its entries. This cache can be 52// read and written without locking. 53// 54// This multi-threaded access to the pagemap is safe for fairly 55// subtle reasons. We basically assume that when an object X is 56// allocated by thread A and deallocated by thread B, there must 57// have been appropriate synchronization in the handoff of object 58// X from thread A to thread B. The same logic applies to pagemap_cache_. 59// 60// THE PAGEID-TO-SIZECLASS CACHE 61// Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache 62// returns 0 for a particular PageID then that means "no information," not that 63// the sizeclass is 0. The cache may have stale information for pages that do 64// not hold the beginning of any free()'able object. Staleness is eliminated 65// in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and 66// do_memalign() for all other relevant pages. 67// 68// TODO: Bias reclamation to larger addresses 69// TODO: implement mallinfo/mallopt 70// TODO: Better testing 71// 72// 9/28/2003 (new page-level allocator replaces ptmalloc2): 73// * malloc/free of small objects goes from ~300 ns to ~50 ns. 74// * allocation of a reasonably complicated struct 75// goes from about 1100 ns to about 300 ns. 76 77#include "config.h" 78#include "FastMalloc.h" 79 80#include "Assertions.h" 81#include <limits> 82#if ENABLE(JSC_MULTIPLE_THREADS) 83#include <pthread.h> 84#endif 85#include <wtf/StdLibExtras.h> 86 87#ifndef NO_TCMALLOC_SAMPLES 88#ifdef WTF_CHANGES 89#define NO_TCMALLOC_SAMPLES 90#endif 91#endif 92 93#if !(defined(USE_SYSTEM_MALLOC) && USE_SYSTEM_MALLOC) && defined(NDEBUG) 94#define FORCE_SYSTEM_MALLOC 0 95#else 96#define FORCE_SYSTEM_MALLOC 1 97#endif 98 99// Use a background thread to periodically scavenge memory to release back to the system 100// https://bugs.webkit.org/show_bug.cgi?id=27900: don't turn this on for Tiger until we have figured out why it caused a crash. 101#if defined(BUILDING_ON_TIGER) 102#define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 0 103#else 104#define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1 105#endif 106 107#ifndef NDEBUG 108namespace WTF { 109 110#if ENABLE(JSC_MULTIPLE_THREADS) 111static pthread_key_t isForbiddenKey; 112static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT; 113static void initializeIsForbiddenKey() 114{ 115 pthread_key_create(&isForbiddenKey, 0); 116} 117 118#if !ASSERT_DISABLED 119static bool isForbidden() 120{ 121 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); 122 return !!pthread_getspecific(isForbiddenKey); 123} 124#endif 125 126void fastMallocForbid() 127{ 128 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); 129 pthread_setspecific(isForbiddenKey, &isForbiddenKey); 130} 131 132void fastMallocAllow() 133{ 134 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); 135 pthread_setspecific(isForbiddenKey, 0); 136} 137 138#else 139 140static bool staticIsForbidden; 141static bool isForbidden() 142{ 143 return staticIsForbidden; 144} 145 146void fastMallocForbid() 147{ 148 staticIsForbidden = true; 149} 150 151void fastMallocAllow() 152{ 153 staticIsForbidden = false; 154} 155#endif // ENABLE(JSC_MULTIPLE_THREADS) 156 157} // namespace WTF 158#endif // NDEBUG 159 160#include <string.h> 161 162namespace WTF { 163 164#if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 165 166namespace Internal { 167 168void fastMallocMatchFailed(void*) 169{ 170 CRASH(); 171} 172 173} // namespace Internal 174 175#endif 176 177void* fastZeroedMalloc(size_t n) 178{ 179 void* result = fastMalloc(n); 180 memset(result, 0, n); 181 return result; 182} 183 184char* fastStrDup(const char* src) 185{ 186 size_t len = strlen(src) + 1; 187 char* dup = static_cast<char*>(fastMalloc(len)); 188 memcpy(dup, src, len); 189 return dup; 190} 191 192TryMallocReturnValue tryFastZeroedMalloc(size_t n) 193{ 194 void* result; 195 if (!tryFastMalloc(n).getValue(result)) 196 return 0; 197 memset(result, 0, n); 198 return result; 199} 200 201} // namespace WTF 202 203#if FORCE_SYSTEM_MALLOC 204 205#if PLATFORM(BREWMP) 206#include "brew/SystemMallocBrew.h" 207#endif 208 209#if OS(DARWIN) 210#include <malloc/malloc.h> 211#elif OS(WINDOWS) 212#include <malloc.h> 213#endif 214 215namespace WTF { 216 217TryMallocReturnValue tryFastMalloc(size_t n) 218{ 219 ASSERT(!isForbidden()); 220 221#if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 222 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur... 223 return 0; 224 225 void* result = malloc(n + sizeof(AllocAlignmentInteger)); 226 if (!result) 227 return 0; 228 229 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; 230 result = static_cast<AllocAlignmentInteger*>(result) + 1; 231 232 return result; 233#else 234 return malloc(n); 235#endif 236} 237 238void* fastMalloc(size_t n) 239{ 240 ASSERT(!isForbidden()); 241 242#if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 243 TryMallocReturnValue returnValue = tryFastMalloc(n); 244 void* result; 245 if (!returnValue.getValue(result)) 246 CRASH(); 247#else 248 void* result = malloc(n); 249#endif 250 251 if (!result) { 252#if PLATFORM(BREWMP) 253 // The behavior of malloc(0) is implementation defined. 254 // To make sure that fastMalloc never returns 0, retry with fastMalloc(1). 255 if (!n) 256 return fastMalloc(1); 257#endif 258 CRASH(); 259 } 260 261 return result; 262} 263 264TryMallocReturnValue tryFastCalloc(size_t n_elements, size_t element_size) 265{ 266 ASSERT(!isForbidden()); 267 268#if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 269 size_t totalBytes = n_elements * element_size; 270 if (n_elements > 1 && element_size && (totalBytes / element_size) != n_elements || (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes)) 271 return 0; 272 273 totalBytes += sizeof(AllocAlignmentInteger); 274 void* result = malloc(totalBytes); 275 if (!result) 276 return 0; 277 278 memset(result, 0, totalBytes); 279 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; 280 result = static_cast<AllocAlignmentInteger*>(result) + 1; 281 return result; 282#else 283 return calloc(n_elements, element_size); 284#endif 285} 286 287void* fastCalloc(size_t n_elements, size_t element_size) 288{ 289 ASSERT(!isForbidden()); 290 291#if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 292 TryMallocReturnValue returnValue = tryFastCalloc(n_elements, element_size); 293 void* result; 294 if (!returnValue.getValue(result)) 295 CRASH(); 296#else 297 void* result = calloc(n_elements, element_size); 298#endif 299 300 if (!result) { 301#if PLATFORM(BREWMP) 302 // If either n_elements or element_size is 0, the behavior of calloc is implementation defined. 303 // To make sure that fastCalloc never returns 0, retry with fastCalloc(1, 1). 304 if (!n_elements || !element_size) 305 return fastCalloc(1, 1); 306#endif 307 CRASH(); 308 } 309 310 return result; 311} 312 313void fastFree(void* p) 314{ 315 ASSERT(!isForbidden()); 316 317#if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 318 if (!p) 319 return; 320 321 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p); 322 if (*header != Internal::AllocTypeMalloc) 323 Internal::fastMallocMatchFailed(p); 324 free(header); 325#else 326 free(p); 327#endif 328} 329 330TryMallocReturnValue tryFastRealloc(void* p, size_t n) 331{ 332 ASSERT(!isForbidden()); 333 334#if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 335 if (p) { 336 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur... 337 return 0; 338 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p); 339 if (*header != Internal::AllocTypeMalloc) 340 Internal::fastMallocMatchFailed(p); 341 void* result = realloc(header, n + sizeof(AllocAlignmentInteger)); 342 if (!result) 343 return 0; 344 345 // This should not be needed because the value is already there: 346 // *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; 347 result = static_cast<AllocAlignmentInteger*>(result) + 1; 348 return result; 349 } else { 350 return fastMalloc(n); 351 } 352#else 353 return realloc(p, n); 354#endif 355} 356 357void* fastRealloc(void* p, size_t n) 358{ 359 ASSERT(!isForbidden()); 360 361#if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 362 TryMallocReturnValue returnValue = tryFastRealloc(p, n); 363 void* result; 364 if (!returnValue.getValue(result)) 365 CRASH(); 366#else 367 void* result = realloc(p, n); 368#endif 369 370 if (!result) 371 CRASH(); 372 return result; 373} 374 375void releaseFastMallocFreeMemory() { } 376 377FastMallocStatistics fastMallocStatistics() 378{ 379 FastMallocStatistics statistics = { 0, 0, 0 }; 380 return statistics; 381} 382 383size_t fastMallocSize(const void* p) 384{ 385#if OS(DARWIN) 386 return malloc_size(p); 387#elif OS(WINDOWS) && !PLATFORM(BREWMP) 388 // Brew MP uses its own memory allocator, so _msize does not work on the Brew MP simulator. 389 return _msize(const_cast<void*>(p)); 390#else 391 return 1; 392#endif 393} 394 395} // namespace WTF 396 397#if OS(DARWIN) 398// This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled. 399// It will never be used in this case, so it's type and value are less interesting than its presence. 400extern "C" const int jscore_fastmalloc_introspection = 0; 401#endif 402 403#else // FORCE_SYSTEM_MALLOC 404 405#if HAVE(STDINT_H) 406#include <stdint.h> 407#elif HAVE(INTTYPES_H) 408#include <inttypes.h> 409#else 410#include <sys/types.h> 411#endif 412 413#include "AlwaysInline.h" 414#include "Assertions.h" 415#include "TCPackedCache.h" 416#include "TCPageMap.h" 417#include "TCSpinLock.h" 418#include "TCSystemAlloc.h" 419#include <algorithm> 420#include <limits> 421#include <pthread.h> 422#include <stdarg.h> 423#include <stddef.h> 424#include <stdio.h> 425#if HAVE(ERRNO_H) 426#include <errno.h> 427#endif 428#if OS(UNIX) 429#include <unistd.h> 430#endif 431#if OS(WINDOWS) 432#ifndef WIN32_LEAN_AND_MEAN 433#define WIN32_LEAN_AND_MEAN 434#endif 435#include <windows.h> 436#endif 437 438#ifdef WTF_CHANGES 439 440#if OS(DARWIN) 441#include "MallocZoneSupport.h" 442#include <wtf/HashSet.h> 443#include <wtf/Vector.h> 444#endif 445 446#if HAVE(HEADER_DETECTION_H) 447#include "HeaderDetection.h" 448#endif 449 450#if HAVE(DISPATCH_H) 451#include <dispatch/dispatch.h> 452#endif 453 454#if HAVE(PTHREAD_MACHDEP_H) 455#include <System/pthread_machdep.h> 456 457#if defined(__PTK_FRAMEWORK_JAVASCRIPTCORE_KEY0) 458#define WTF_USE_PTHREAD_GETSPECIFIC_DIRECT 1 459#endif 460#endif 461 462#ifndef PRIuS 463#define PRIuS "zu" 464#endif 465 466// Calling pthread_getspecific through a global function pointer is faster than a normal 467// call to the function on Mac OS X, and it's used in performance-critical code. So we 468// use a function pointer. But that's not necessarily faster on other platforms, and we had 469// problems with this technique on Windows, so we'll do this only on Mac OS X. 470#if OS(DARWIN) 471#if !USE(PTHREAD_GETSPECIFIC_DIRECT) 472static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific; 473#define pthread_getspecific(key) pthread_getspecific_function_pointer(key) 474#else 475#define pthread_getspecific(key) _pthread_getspecific_direct(key) 476#define pthread_setspecific(key, val) _pthread_setspecific_direct(key, (val)) 477#endif 478#endif 479 480#define DEFINE_VARIABLE(type, name, value, meaning) \ 481 namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \ 482 type FLAGS_##name(value); \ 483 char FLAGS_no##name; \ 484 } \ 485 using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name 486 487#define DEFINE_int64(name, value, meaning) \ 488 DEFINE_VARIABLE(int64_t, name, value, meaning) 489 490#define DEFINE_double(name, value, meaning) \ 491 DEFINE_VARIABLE(double, name, value, meaning) 492 493namespace WTF { 494 495#define malloc fastMalloc 496#define calloc fastCalloc 497#define free fastFree 498#define realloc fastRealloc 499 500#define MESSAGE LOG_ERROR 501#define CHECK_CONDITION ASSERT 502 503#if OS(DARWIN) 504struct Span; 505class TCMalloc_Central_FreeListPadded; 506class TCMalloc_PageHeap; 507class TCMalloc_ThreadCache; 508template <typename T> class PageHeapAllocator; 509 510class FastMallocZone { 511public: 512 static void init(); 513 514 static kern_return_t enumerate(task_t, void*, unsigned typeMmask, vm_address_t zoneAddress, memory_reader_t, vm_range_recorder_t); 515 static size_t goodSize(malloc_zone_t*, size_t size) { return size; } 516 static boolean_t check(malloc_zone_t*) { return true; } 517 static void print(malloc_zone_t*, boolean_t) { } 518 static void log(malloc_zone_t*, void*) { } 519 static void forceLock(malloc_zone_t*) { } 520 static void forceUnlock(malloc_zone_t*) { } 521 static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); } 522 523private: 524 FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*, PageHeapAllocator<Span>*, PageHeapAllocator<TCMalloc_ThreadCache>*); 525 static size_t size(malloc_zone_t*, const void*); 526 static void* zoneMalloc(malloc_zone_t*, size_t); 527 static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size); 528 static void zoneFree(malloc_zone_t*, void*); 529 static void* zoneRealloc(malloc_zone_t*, void*, size_t); 530 static void* zoneValloc(malloc_zone_t*, size_t) { LOG_ERROR("valloc is not supported"); return 0; } 531 static void zoneDestroy(malloc_zone_t*) { } 532 533 malloc_zone_t m_zone; 534 TCMalloc_PageHeap* m_pageHeap; 535 TCMalloc_ThreadCache** m_threadHeaps; 536 TCMalloc_Central_FreeListPadded* m_centralCaches; 537 PageHeapAllocator<Span>* m_spanAllocator; 538 PageHeapAllocator<TCMalloc_ThreadCache>* m_pageHeapAllocator; 539}; 540 541#endif 542 543#endif 544 545#ifndef WTF_CHANGES 546// This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if 547// you're porting to a system where you really can't get a stacktrace. 548#ifdef NO_TCMALLOC_SAMPLES 549// We use #define so code compiles even if you #include stacktrace.h somehow. 550# define GetStackTrace(stack, depth, skip) (0) 551#else 552# include <google/stacktrace.h> 553#endif 554#endif 555 556// Even if we have support for thread-local storage in the compiler 557// and linker, the OS may not support it. We need to check that at 558// runtime. Right now, we have to keep a manual set of "bad" OSes. 559#if defined(HAVE_TLS) 560 static bool kernel_supports_tls = false; // be conservative 561 static inline bool KernelSupportsTLS() { 562 return kernel_supports_tls; 563 } 564# if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS 565 static void CheckIfKernelSupportsTLS() { 566 kernel_supports_tls = false; 567 } 568# else 569# include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too 570 static void CheckIfKernelSupportsTLS() { 571 struct utsname buf; 572 if (uname(&buf) != 0) { // should be impossible 573 MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno); 574 kernel_supports_tls = false; 575 } else if (strcasecmp(buf.sysname, "linux") == 0) { 576 // The linux case: the first kernel to support TLS was 2.6.0 577 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x 578 kernel_supports_tls = false; 579 else if (buf.release[0] == '2' && buf.release[1] == '.' && 580 buf.release[2] >= '0' && buf.release[2] < '6' && 581 buf.release[3] == '.') // 2.0 - 2.5 582 kernel_supports_tls = false; 583 else 584 kernel_supports_tls = true; 585 } else { // some other kernel, we'll be optimisitic 586 kernel_supports_tls = true; 587 } 588 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG 589 } 590# endif // HAVE_DECL_UNAME 591#endif // HAVE_TLS 592 593// __THROW is defined in glibc systems. It means, counter-intuitively, 594// "This function will never throw an exception." It's an optional 595// optimization tool, but we may need to use it to match glibc prototypes. 596#ifndef __THROW // I guess we're not on a glibc system 597# define __THROW // __THROW is just an optimization, so ok to make it "" 598#endif 599 600//------------------------------------------------------------------- 601// Configuration 602//------------------------------------------------------------------- 603 604// Not all possible combinations of the following parameters make 605// sense. In particular, if kMaxSize increases, you may have to 606// increase kNumClasses as well. 607static const size_t kPageShift = 12; 608static const size_t kPageSize = 1 << kPageShift; 609static const size_t kMaxSize = 8u * kPageSize; 610static const size_t kAlignShift = 3; 611static const size_t kAlignment = 1 << kAlignShift; 612static const size_t kNumClasses = 68; 613 614// Allocates a big block of memory for the pagemap once we reach more than 615// 128MB 616static const size_t kPageMapBigAllocationThreshold = 128 << 20; 617 618// Minimum number of pages to fetch from system at a time. Must be 619// significantly bigger than kPageSize to amortize system-call 620// overhead, and also to reduce external fragementation. Also, we 621// should keep this value big because various incarnations of Linux 622// have small limits on the number of mmap() regions per 623// address-space. 624static const size_t kMinSystemAlloc = 1 << (20 - kPageShift); 625 626// Number of objects to move between a per-thread list and a central 627// list in one shot. We want this to be not too small so we can 628// amortize the lock overhead for accessing the central list. Making 629// it too big may temporarily cause unnecessary memory wastage in the 630// per-thread free list until the scavenger cleans up the list. 631static int num_objects_to_move[kNumClasses]; 632 633// Maximum length we allow a per-thread free-list to have before we 634// move objects from it into the corresponding central free-list. We 635// want this big to avoid locking the central free-list too often. It 636// should not hurt to make this list somewhat big because the 637// scavenging code will shrink it down when its contents are not in use. 638static const int kMaxFreeListLength = 256; 639 640// Lower and upper bounds on the per-thread cache sizes 641static const size_t kMinThreadCacheSize = kMaxSize * 2; 642static const size_t kMaxThreadCacheSize = 2 << 20; 643 644// Default bound on the total amount of thread caches 645static const size_t kDefaultOverallThreadCacheSize = 16 << 20; 646 647// For all span-lengths < kMaxPages we keep an exact-size list. 648// REQUIRED: kMaxPages >= kMinSystemAlloc; 649static const size_t kMaxPages = kMinSystemAlloc; 650 651/* The smallest prime > 2^n */ 652static int primes_list[] = { 653 // Small values might cause high rates of sampling 654 // and hence commented out. 655 // 2, 5, 11, 17, 37, 67, 131, 257, 656 // 521, 1031, 2053, 4099, 8209, 16411, 657 32771, 65537, 131101, 262147, 524309, 1048583, 658 2097169, 4194319, 8388617, 16777259, 33554467 }; 659 660// Twice the approximate gap between sampling actions. 661// I.e., we take one sample approximately once every 662// tcmalloc_sample_parameter/2 663// bytes of allocation, i.e., ~ once every 128KB. 664// Must be a prime number. 665#ifdef NO_TCMALLOC_SAMPLES 666DEFINE_int64(tcmalloc_sample_parameter, 0, 667 "Unused: code is compiled with NO_TCMALLOC_SAMPLES"); 668static size_t sample_period = 0; 669#else 670DEFINE_int64(tcmalloc_sample_parameter, 262147, 671 "Twice the approximate gap between sampling actions." 672 " Must be a prime number. Otherwise will be rounded up to a " 673 " larger prime number"); 674static size_t sample_period = 262147; 675#endif 676 677// Protects sample_period above 678static SpinLock sample_period_lock = SPINLOCK_INITIALIZER; 679 680// Parameters for controlling how fast memory is returned to the OS. 681 682DEFINE_double(tcmalloc_release_rate, 1, 683 "Rate at which we release unused memory to the system. " 684 "Zero means we never release memory back to the system. " 685 "Increase this flag to return memory faster; decrease it " 686 "to return memory slower. Reasonable rates are in the " 687 "range [0,10]"); 688 689//------------------------------------------------------------------- 690// Mapping from size to size_class and vice versa 691//------------------------------------------------------------------- 692 693// Sizes <= 1024 have an alignment >= 8. So for such sizes we have an 694// array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128. 695// So for these larger sizes we have an array indexed by ceil(size/128). 696// 697// We flatten both logical arrays into one physical array and use 698// arithmetic to compute an appropriate index. The constants used by 699// ClassIndex() were selected to make the flattening work. 700// 701// Examples: 702// Size Expression Index 703// ------------------------------------------------------- 704// 0 (0 + 7) / 8 0 705// 1 (1 + 7) / 8 1 706// ... 707// 1024 (1024 + 7) / 8 128 708// 1025 (1025 + 127 + (120<<7)) / 128 129 709// ... 710// 32768 (32768 + 127 + (120<<7)) / 128 376 711static const size_t kMaxSmallSize = 1024; 712static const int shift_amount[2] = { 3, 7 }; // For divides by 8 or 128 713static const int add_amount[2] = { 7, 127 + (120 << 7) }; 714static unsigned char class_array[377]; 715 716// Compute index of the class_array[] entry for a given size 717static inline int ClassIndex(size_t s) { 718 const int i = (s > kMaxSmallSize); 719 return static_cast<int>((s + add_amount[i]) >> shift_amount[i]); 720} 721 722// Mapping from size class to max size storable in that class 723static size_t class_to_size[kNumClasses]; 724 725// Mapping from size class to number of pages to allocate at a time 726static size_t class_to_pages[kNumClasses]; 727 728// TransferCache is used to cache transfers of num_objects_to_move[size_class] 729// back and forth between thread caches and the central cache for a given size 730// class. 731struct TCEntry { 732 void *head; // Head of chain of objects. 733 void *tail; // Tail of chain of objects. 734}; 735// A central cache freelist can have anywhere from 0 to kNumTransferEntries 736// slots to put link list chains into. To keep memory usage bounded the total 737// number of TCEntries across size classes is fixed. Currently each size 738// class is initially given one TCEntry which also means that the maximum any 739// one class can have is kNumClasses. 740static const int kNumTransferEntries = kNumClasses; 741 742// Note: the following only works for "n"s that fit in 32-bits, but 743// that is fine since we only use it for small sizes. 744static inline int LgFloor(size_t n) { 745 int log = 0; 746 for (int i = 4; i >= 0; --i) { 747 int shift = (1 << i); 748 size_t x = n >> shift; 749 if (x != 0) { 750 n = x; 751 log += shift; 752 } 753 } 754 ASSERT(n == 1); 755 return log; 756} 757 758// Some very basic linked list functions for dealing with using void * as 759// storage. 760 761static inline void *SLL_Next(void *t) { 762 return *(reinterpret_cast<void**>(t)); 763} 764 765static inline void SLL_SetNext(void *t, void *n) { 766 *(reinterpret_cast<void**>(t)) = n; 767} 768 769static inline void SLL_Push(void **list, void *element) { 770 SLL_SetNext(element, *list); 771 *list = element; 772} 773 774static inline void *SLL_Pop(void **list) { 775 void *result = *list; 776 *list = SLL_Next(*list); 777 return result; 778} 779 780 781// Remove N elements from a linked list to which head points. head will be 782// modified to point to the new head. start and end will point to the first 783// and last nodes of the range. Note that end will point to NULL after this 784// function is called. 785static inline void SLL_PopRange(void **head, int N, void **start, void **end) { 786 if (N == 0) { 787 *start = NULL; 788 *end = NULL; 789 return; 790 } 791 792 void *tmp = *head; 793 for (int i = 1; i < N; ++i) { 794 tmp = SLL_Next(tmp); 795 } 796 797 *start = *head; 798 *end = tmp; 799 *head = SLL_Next(tmp); 800 // Unlink range from list. 801 SLL_SetNext(tmp, NULL); 802} 803 804static inline void SLL_PushRange(void **head, void *start, void *end) { 805 if (!start) return; 806 SLL_SetNext(end, *head); 807 *head = start; 808} 809 810static inline size_t SLL_Size(void *head) { 811 int count = 0; 812 while (head) { 813 count++; 814 head = SLL_Next(head); 815 } 816 return count; 817} 818 819// Setup helper functions. 820 821static ALWAYS_INLINE size_t SizeClass(size_t size) { 822 return class_array[ClassIndex(size)]; 823} 824 825// Get the byte-size for a specified class 826static ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) { 827 return class_to_size[cl]; 828} 829static int NumMoveSize(size_t size) { 830 if (size == 0) return 0; 831 // Use approx 64k transfers between thread and central caches. 832 int num = static_cast<int>(64.0 * 1024.0 / size); 833 if (num < 2) num = 2; 834 // Clamp well below kMaxFreeListLength to avoid ping pong between central 835 // and thread caches. 836 if (num > static_cast<int>(0.8 * kMaxFreeListLength)) 837 num = static_cast<int>(0.8 * kMaxFreeListLength); 838 839 // Also, avoid bringing in too many objects into small object free 840 // lists. There are lots of such lists, and if we allow each one to 841 // fetch too many at a time, we end up having to scavenge too often 842 // (especially when there are lots of threads and each thread gets a 843 // small allowance for its thread cache). 844 // 845 // TODO: Make thread cache free list sizes dynamic so that we do not 846 // have to equally divide a fixed resource amongst lots of threads. 847 if (num > 32) num = 32; 848 849 return num; 850} 851 852// Initialize the mapping arrays 853static void InitSizeClasses() { 854 // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] 855 if (ClassIndex(0) < 0) { 856 MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0)); 857 CRASH(); 858 } 859 if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) { 860 MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize)); 861 CRASH(); 862 } 863 864 // Compute the size classes we want to use 865 size_t sc = 1; // Next size class to assign 866 unsigned char alignshift = kAlignShift; 867 int last_lg = -1; 868 for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) { 869 int lg = LgFloor(size); 870 if (lg > last_lg) { 871 // Increase alignment every so often. 872 // 873 // Since we double the alignment every time size doubles and 874 // size >= 128, this means that space wasted due to alignment is 875 // at most 16/128 i.e., 12.5%. Plus we cap the alignment at 256 876 // bytes, so the space wasted as a percentage starts falling for 877 // sizes > 2K. 878 if ((lg >= 7) && (alignshift < 8)) { 879 alignshift++; 880 } 881 last_lg = lg; 882 } 883 884 // Allocate enough pages so leftover is less than 1/8 of total. 885 // This bounds wasted space to at most 12.5%. 886 size_t psize = kPageSize; 887 while ((psize % size) > (psize >> 3)) { 888 psize += kPageSize; 889 } 890 const size_t my_pages = psize >> kPageShift; 891 892 if (sc > 1 && my_pages == class_to_pages[sc-1]) { 893 // See if we can merge this into the previous class without 894 // increasing the fragmentation of the previous class. 895 const size_t my_objects = (my_pages << kPageShift) / size; 896 const size_t prev_objects = (class_to_pages[sc-1] << kPageShift) 897 / class_to_size[sc-1]; 898 if (my_objects == prev_objects) { 899 // Adjust last class to include this size 900 class_to_size[sc-1] = size; 901 continue; 902 } 903 } 904 905 // Add new class 906 class_to_pages[sc] = my_pages; 907 class_to_size[sc] = size; 908 sc++; 909 } 910 if (sc != kNumClasses) { 911 MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n", 912 sc, int(kNumClasses)); 913 CRASH(); 914 } 915 916 // Initialize the mapping arrays 917 int next_size = 0; 918 for (unsigned char c = 1; c < kNumClasses; c++) { 919 const size_t max_size_in_class = class_to_size[c]; 920 for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) { 921 class_array[ClassIndex(s)] = c; 922 } 923 next_size = static_cast<int>(max_size_in_class + kAlignment); 924 } 925 926 // Double-check sizes just to be safe 927 for (size_t size = 0; size <= kMaxSize; size++) { 928 const size_t sc = SizeClass(size); 929 if (sc == 0) { 930 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size); 931 CRASH(); 932 } 933 if (sc > 1 && size <= class_to_size[sc-1]) { 934 MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS 935 "\n", sc, size); 936 CRASH(); 937 } 938 if (sc >= kNumClasses) { 939 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size); 940 CRASH(); 941 } 942 const size_t s = class_to_size[sc]; 943 if (size > s) { 944 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc); 945 CRASH(); 946 } 947 if (s == 0) { 948 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc); 949 CRASH(); 950 } 951 } 952 953 // Initialize the num_objects_to_move array. 954 for (size_t cl = 1; cl < kNumClasses; ++cl) { 955 num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl)); 956 } 957 958#ifndef WTF_CHANGES 959 if (false) { 960 // Dump class sizes and maximum external wastage per size class 961 for (size_t cl = 1; cl < kNumClasses; ++cl) { 962 const int alloc_size = class_to_pages[cl] << kPageShift; 963 const int alloc_objs = alloc_size / class_to_size[cl]; 964 const int min_used = (class_to_size[cl-1] + 1) * alloc_objs; 965 const int max_waste = alloc_size - min_used; 966 MESSAGE("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n", 967 int(cl), 968 int(class_to_size[cl-1] + 1), 969 int(class_to_size[cl]), 970 int(class_to_pages[cl] << kPageShift), 971 max_waste * 100.0 / alloc_size 972 ); 973 } 974 } 975#endif 976} 977 978// ------------------------------------------------------------------------- 979// Simple allocator for objects of a specified type. External locking 980// is required before accessing one of these objects. 981// ------------------------------------------------------------------------- 982 983// Metadata allocator -- keeps stats about how many bytes allocated 984static uint64_t metadata_system_bytes = 0; 985static void* MetaDataAlloc(size_t bytes) { 986 void* result = TCMalloc_SystemAlloc(bytes, 0); 987 if (result != NULL) { 988 metadata_system_bytes += bytes; 989 } 990 return result; 991} 992 993template <class T> 994class PageHeapAllocator { 995 private: 996 // How much to allocate from system at a time 997 static const size_t kAllocIncrement = 32 << 10; 998 999 // Aligned size of T 1000 static const size_t kAlignedSize 1001 = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment); 1002 1003 // Free area from which to carve new objects 1004 char* free_area_; 1005 size_t free_avail_; 1006 1007 // Linked list of all regions allocated by this allocator 1008 void* allocated_regions_; 1009 1010 // Free list of already carved objects 1011 void* free_list_; 1012 1013 // Number of allocated but unfreed objects 1014 int inuse_; 1015 1016 public: 1017 void Init() { 1018 ASSERT(kAlignedSize <= kAllocIncrement); 1019 inuse_ = 0; 1020 allocated_regions_ = 0; 1021 free_area_ = NULL; 1022 free_avail_ = 0; 1023 free_list_ = NULL; 1024 } 1025 1026 T* New() { 1027 // Consult free list 1028 void* result; 1029 if (free_list_ != NULL) { 1030 result = free_list_; 1031 free_list_ = *(reinterpret_cast<void**>(result)); 1032 } else { 1033 if (free_avail_ < kAlignedSize) { 1034 // Need more room 1035 char* new_allocation = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement)); 1036 if (!new_allocation) 1037 CRASH(); 1038 1039 *reinterpret_cast_ptr<void**>(new_allocation) = allocated_regions_; 1040 allocated_regions_ = new_allocation; 1041 free_area_ = new_allocation + kAlignedSize; 1042 free_avail_ = kAllocIncrement - kAlignedSize; 1043 } 1044 result = free_area_; 1045 free_area_ += kAlignedSize; 1046 free_avail_ -= kAlignedSize; 1047 } 1048 inuse_++; 1049 return reinterpret_cast<T*>(result); 1050 } 1051 1052 void Delete(T* p) { 1053 *(reinterpret_cast<void**>(p)) = free_list_; 1054 free_list_ = p; 1055 inuse_--; 1056 } 1057 1058 int inuse() const { return inuse_; } 1059 1060#if defined(WTF_CHANGES) && OS(DARWIN) 1061 template <class Recorder> 1062 void recordAdministrativeRegions(Recorder& recorder, const RemoteMemoryReader& reader) 1063 { 1064 for (void* adminAllocation = allocated_regions_; adminAllocation; adminAllocation = reader.nextEntryInLinkedList(reinterpret_cast<void**>(adminAllocation))) 1065 recorder.recordRegion(reinterpret_cast<vm_address_t>(adminAllocation), kAllocIncrement); 1066 } 1067#endif 1068}; 1069 1070// ------------------------------------------------------------------------- 1071// Span - a contiguous run of pages 1072// ------------------------------------------------------------------------- 1073 1074// Type that can hold a page number 1075typedef uintptr_t PageID; 1076 1077// Type that can hold the length of a run of pages 1078typedef uintptr_t Length; 1079 1080static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift; 1081 1082// Convert byte size into pages. This won't overflow, but may return 1083// an unreasonably large value if bytes is huge enough. 1084static inline Length pages(size_t bytes) { 1085 return (bytes >> kPageShift) + 1086 ((bytes & (kPageSize - 1)) > 0 ? 1 : 0); 1087} 1088 1089// Convert a user size into the number of bytes that will actually be 1090// allocated 1091static size_t AllocationSize(size_t bytes) { 1092 if (bytes > kMaxSize) { 1093 // Large object: we allocate an integral number of pages 1094 ASSERT(bytes <= (kMaxValidPages << kPageShift)); 1095 return pages(bytes) << kPageShift; 1096 } else { 1097 // Small object: find the size class to which it belongs 1098 return ByteSizeForClass(SizeClass(bytes)); 1099 } 1100} 1101 1102// Information kept for a span (a contiguous run of pages). 1103struct Span { 1104 PageID start; // Starting page number 1105 Length length; // Number of pages in span 1106 Span* next; // Used when in link list 1107 Span* prev; // Used when in link list 1108 void* objects; // Linked list of free objects 1109 unsigned int free : 1; // Is the span free 1110#ifndef NO_TCMALLOC_SAMPLES 1111 unsigned int sample : 1; // Sampled object? 1112#endif 1113 unsigned int sizeclass : 8; // Size-class for small objects (or 0) 1114 unsigned int refcount : 11; // Number of non-free objects 1115 bool decommitted : 1; 1116 1117#undef SPAN_HISTORY 1118#ifdef SPAN_HISTORY 1119 // For debugging, we can keep a log events per span 1120 int nexthistory; 1121 char history[64]; 1122 int value[64]; 1123#endif 1124}; 1125 1126#define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted) 1127 1128#ifdef SPAN_HISTORY 1129void Event(Span* span, char op, int v = 0) { 1130 span->history[span->nexthistory] = op; 1131 span->value[span->nexthistory] = v; 1132 span->nexthistory++; 1133 if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0; 1134} 1135#else 1136#define Event(s,o,v) ((void) 0) 1137#endif 1138 1139// Allocator/deallocator for spans 1140static PageHeapAllocator<Span> span_allocator; 1141static Span* NewSpan(PageID p, Length len) { 1142 Span* result = span_allocator.New(); 1143 memset(result, 0, sizeof(*result)); 1144 result->start = p; 1145 result->length = len; 1146#ifdef SPAN_HISTORY 1147 result->nexthistory = 0; 1148#endif 1149 return result; 1150} 1151 1152static inline void DeleteSpan(Span* span) { 1153#ifndef NDEBUG 1154 // In debug mode, trash the contents of deleted Spans 1155 memset(span, 0x3f, sizeof(*span)); 1156#endif 1157 span_allocator.Delete(span); 1158} 1159 1160// ------------------------------------------------------------------------- 1161// Doubly linked list of spans. 1162// ------------------------------------------------------------------------- 1163 1164static inline void DLL_Init(Span* list) { 1165 list->next = list; 1166 list->prev = list; 1167} 1168 1169static inline void DLL_Remove(Span* span) { 1170 span->prev->next = span->next; 1171 span->next->prev = span->prev; 1172 span->prev = NULL; 1173 span->next = NULL; 1174} 1175 1176static ALWAYS_INLINE bool DLL_IsEmpty(const Span* list) { 1177 return list->next == list; 1178} 1179 1180static int DLL_Length(const Span* list) { 1181 int result = 0; 1182 for (Span* s = list->next; s != list; s = s->next) { 1183 result++; 1184 } 1185 return result; 1186} 1187 1188#if 0 /* Not needed at the moment -- causes compiler warnings if not used */ 1189static void DLL_Print(const char* label, const Span* list) { 1190 MESSAGE("%-10s %p:", label, list); 1191 for (const Span* s = list->next; s != list; s = s->next) { 1192 MESSAGE(" <%p,%u,%u>", s, s->start, s->length); 1193 } 1194 MESSAGE("\n"); 1195} 1196#endif 1197 1198static inline void DLL_Prepend(Span* list, Span* span) { 1199 ASSERT(span->next == NULL); 1200 ASSERT(span->prev == NULL); 1201 span->next = list->next; 1202 span->prev = list; 1203 list->next->prev = span; 1204 list->next = span; 1205} 1206 1207// ------------------------------------------------------------------------- 1208// Stack traces kept for sampled allocations 1209// The following state is protected by pageheap_lock_. 1210// ------------------------------------------------------------------------- 1211 1212// size/depth are made the same size as a pointer so that some generic 1213// code below can conveniently cast them back and forth to void*. 1214static const int kMaxStackDepth = 31; 1215struct StackTrace { 1216 uintptr_t size; // Size of object 1217 uintptr_t depth; // Number of PC values stored in array below 1218 void* stack[kMaxStackDepth]; 1219}; 1220static PageHeapAllocator<StackTrace> stacktrace_allocator; 1221static Span sampled_objects; 1222 1223// ------------------------------------------------------------------------- 1224// Map from page-id to per-page data 1225// ------------------------------------------------------------------------- 1226 1227// We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines. 1228// We also use a simple one-level cache for hot PageID-to-sizeclass mappings, 1229// because sometimes the sizeclass is all the information we need. 1230 1231// Selector class -- general selector uses 3-level map 1232template <int BITS> class MapSelector { 1233 public: 1234 typedef TCMalloc_PageMap3<BITS-kPageShift> Type; 1235 typedef PackedCache<BITS, uint64_t> CacheType; 1236}; 1237 1238#if defined(WTF_CHANGES) 1239#if CPU(X86_64) 1240// On all known X86-64 platforms, the upper 16 bits are always unused and therefore 1241// can be excluded from the PageMap key. 1242// See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details 1243 1244static const size_t kBitsUnusedOn64Bit = 16; 1245#else 1246static const size_t kBitsUnusedOn64Bit = 0; 1247#endif 1248 1249// A three-level map for 64-bit machines 1250template <> class MapSelector<64> { 1251 public: 1252 typedef TCMalloc_PageMap3<64 - kPageShift - kBitsUnusedOn64Bit> Type; 1253 typedef PackedCache<64, uint64_t> CacheType; 1254}; 1255#endif 1256 1257// A two-level map for 32-bit machines 1258template <> class MapSelector<32> { 1259 public: 1260 typedef TCMalloc_PageMap2<32 - kPageShift> Type; 1261 typedef PackedCache<32 - kPageShift, uint16_t> CacheType; 1262}; 1263 1264// ------------------------------------------------------------------------- 1265// Page-level allocator 1266// * Eager coalescing 1267// 1268// Heap for page-level allocation. We allow allocating and freeing a 1269// contiguous runs of pages (called a "span"). 1270// ------------------------------------------------------------------------- 1271 1272#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1273// The page heap maintains a free list for spans that are no longer in use by 1274// the central cache or any thread caches. We use a background thread to 1275// periodically scan the free list and release a percentage of it back to the OS. 1276 1277// If free_committed_pages_ exceeds kMinimumFreeCommittedPageCount, the 1278// background thread: 1279// - wakes up 1280// - pauses for kScavengeDelayInSeconds 1281// - returns to the OS a percentage of the memory that remained unused during 1282// that pause (kScavengePercentage * min_free_committed_pages_since_last_scavenge_) 1283// The goal of this strategy is to reduce memory pressure in a timely fashion 1284// while avoiding thrashing the OS allocator. 1285 1286// Time delay before the page heap scavenger will consider returning pages to 1287// the OS. 1288static const int kScavengeDelayInSeconds = 2; 1289 1290// Approximate percentage of free committed pages to return to the OS in one 1291// scavenge. 1292static const float kScavengePercentage = .5f; 1293 1294// number of span lists to keep spans in when memory is returned. 1295static const int kMinSpanListsWithSpans = 32; 1296 1297// Number of free committed pages that we want to keep around. The minimum number of pages used when there 1298// is 1 span in each of the first kMinSpanListsWithSpans spanlists. Currently 528 pages. 1299static const size_t kMinimumFreeCommittedPageCount = kMinSpanListsWithSpans * ((1.0f+kMinSpanListsWithSpans) / 2.0f); 1300 1301#endif 1302 1303class TCMalloc_PageHeap { 1304 public: 1305 void init(); 1306 1307 // Allocate a run of "n" pages. Returns zero if out of memory. 1308 Span* New(Length n); 1309 1310 // Delete the span "[p, p+n-1]". 1311 // REQUIRES: span was returned by earlier call to New() and 1312 // has not yet been deleted. 1313 void Delete(Span* span); 1314 1315 // Mark an allocated span as being used for small objects of the 1316 // specified size-class. 1317 // REQUIRES: span was returned by an earlier call to New() 1318 // and has not yet been deleted. 1319 void RegisterSizeClass(Span* span, size_t sc); 1320 1321 // Split an allocated span into two spans: one of length "n" pages 1322 // followed by another span of length "span->length - n" pages. 1323 // Modifies "*span" to point to the first span of length "n" pages. 1324 // Returns a pointer to the second span. 1325 // 1326 // REQUIRES: "0 < n < span->length" 1327 // REQUIRES: !span->free 1328 // REQUIRES: span->sizeclass == 0 1329 Span* Split(Span* span, Length n); 1330 1331 // Return the descriptor for the specified page. 1332 inline Span* GetDescriptor(PageID p) const { 1333 return reinterpret_cast<Span*>(pagemap_.get(p)); 1334 } 1335 1336#ifdef WTF_CHANGES 1337 inline Span* GetDescriptorEnsureSafe(PageID p) 1338 { 1339 pagemap_.Ensure(p, 1); 1340 return GetDescriptor(p); 1341 } 1342 1343 size_t ReturnedBytes() const; 1344#endif 1345 1346 // Dump state to stderr 1347#ifndef WTF_CHANGES 1348 void Dump(TCMalloc_Printer* out); 1349#endif 1350 1351 // Return number of bytes allocated from system 1352 inline uint64_t SystemBytes() const { return system_bytes_; } 1353 1354 // Return number of free bytes in heap 1355 uint64_t FreeBytes() const { 1356 return (static_cast<uint64_t>(free_pages_) << kPageShift); 1357 } 1358 1359 bool Check(); 1360 bool CheckList(Span* list, Length min_pages, Length max_pages); 1361 1362 // Release all pages on the free list for reuse by the OS: 1363 void ReleaseFreePages(); 1364 1365 // Return 0 if we have no information, or else the correct sizeclass for p. 1366 // Reads and writes to pagemap_cache_ do not require locking. 1367 // The entries are 64 bits on 64-bit hardware and 16 bits on 1368 // 32-bit hardware, and we don't mind raciness as long as each read of 1369 // an entry yields a valid entry, not a partially updated entry. 1370 size_t GetSizeClassIfCached(PageID p) const { 1371 return pagemap_cache_.GetOrDefault(p, 0); 1372 } 1373 void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); } 1374 1375 private: 1376 // Pick the appropriate map and cache types based on pointer size 1377 typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap; 1378 typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache; 1379 PageMap pagemap_; 1380 mutable PageMapCache pagemap_cache_; 1381 1382 // We segregate spans of a given size into two circular linked 1383 // lists: one for normal spans, and one for spans whose memory 1384 // has been returned to the system. 1385 struct SpanList { 1386 Span normal; 1387 Span returned; 1388 }; 1389 1390 // List of free spans of length >= kMaxPages 1391 SpanList large_; 1392 1393 // Array mapping from span length to a doubly linked list of free spans 1394 SpanList free_[kMaxPages]; 1395 1396 // Number of pages kept in free lists 1397 uintptr_t free_pages_; 1398 1399 // Bytes allocated from system 1400 uint64_t system_bytes_; 1401 1402#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1403 // Number of pages kept in free lists that are still committed. 1404 Length free_committed_pages_; 1405 1406 // Minimum number of free committed pages since last scavenge. (Can be 0 if 1407 // we've committed new pages since the last scavenge.) 1408 Length min_free_committed_pages_since_last_scavenge_; 1409#endif 1410 1411 bool GrowHeap(Length n); 1412 1413 // REQUIRES span->length >= n 1414 // Remove span from its free list, and move any leftover part of 1415 // span into appropriate free lists. Also update "span" to have 1416 // length exactly "n" and mark it as non-free so it can be returned 1417 // to the client. 1418 // 1419 // "released" is true iff "span" was found on a "returned" list. 1420 void Carve(Span* span, Length n, bool released); 1421 1422 void RecordSpan(Span* span) { 1423 pagemap_.set(span->start, span); 1424 if (span->length > 1) { 1425 pagemap_.set(span->start + span->length - 1, span); 1426 } 1427 } 1428 1429 // Allocate a large span of length == n. If successful, returns a 1430 // span of exactly the specified length. Else, returns NULL. 1431 Span* AllocLarge(Length n); 1432 1433#if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1434 // Incrementally release some memory to the system. 1435 // IncrementalScavenge(n) is called whenever n pages are freed. 1436 void IncrementalScavenge(Length n); 1437#endif 1438 1439 // Number of pages to deallocate before doing more scavenging 1440 int64_t scavenge_counter_; 1441 1442 // Index of last free list we scavenged 1443 size_t scavenge_index_; 1444 1445#if defined(WTF_CHANGES) && OS(DARWIN) 1446 friend class FastMallocZone; 1447#endif 1448 1449#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1450 void initializeScavenger(); 1451 ALWAYS_INLINE void signalScavenger(); 1452 void scavenge(); 1453 ALWAYS_INLINE bool shouldScavenge() const; 1454 1455#if HAVE(DISPATCH_H) || OS(WINDOWS) 1456 void periodicScavenge(); 1457 ALWAYS_INLINE bool isScavengerSuspended(); 1458 ALWAYS_INLINE void scheduleScavenger(); 1459 ALWAYS_INLINE void rescheduleScavenger(); 1460 ALWAYS_INLINE void suspendScavenger(); 1461#endif 1462 1463#if HAVE(DISPATCH_H) 1464 dispatch_queue_t m_scavengeQueue; 1465 dispatch_source_t m_scavengeTimer; 1466 bool m_scavengingSuspended; 1467#elif OS(WINDOWS) 1468 static void CALLBACK scavengerTimerFired(void*, BOOLEAN); 1469 HANDLE m_scavengeQueueTimer; 1470#else 1471 static NO_RETURN_WITH_VALUE void* runScavengerThread(void*); 1472 NO_RETURN void scavengerThread(); 1473 1474 // Keeps track of whether the background thread is actively scavenging memory every kScavengeDelayInSeconds, or 1475 // it's blocked waiting for more pages to be deleted. 1476 bool m_scavengeThreadActive; 1477 1478 pthread_mutex_t m_scavengeMutex; 1479 pthread_cond_t m_scavengeCondition; 1480#endif 1481 1482#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1483}; 1484 1485void TCMalloc_PageHeap::init() 1486{ 1487 pagemap_.init(MetaDataAlloc); 1488 pagemap_cache_ = PageMapCache(0); 1489 free_pages_ = 0; 1490 system_bytes_ = 0; 1491 1492#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1493 free_committed_pages_ = 0; 1494 min_free_committed_pages_since_last_scavenge_ = 0; 1495#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1496 1497 scavenge_counter_ = 0; 1498 // Start scavenging at kMaxPages list 1499 scavenge_index_ = kMaxPages-1; 1500 COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits); 1501 DLL_Init(&large_.normal); 1502 DLL_Init(&large_.returned); 1503 for (size_t i = 0; i < kMaxPages; i++) { 1504 DLL_Init(&free_[i].normal); 1505 DLL_Init(&free_[i].returned); 1506 } 1507 1508#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1509 initializeScavenger(); 1510#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1511} 1512 1513#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1514 1515#if HAVE(DISPATCH_H) 1516 1517void TCMalloc_PageHeap::initializeScavenger() 1518{ 1519 m_scavengeQueue = dispatch_queue_create("com.apple.JavaScriptCore.FastMallocSavenger", NULL); 1520 m_scavengeTimer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, m_scavengeQueue); 1521 dispatch_time_t startTime = dispatch_time(DISPATCH_TIME_NOW, kScavengeDelayInSeconds * NSEC_PER_SEC); 1522 dispatch_source_set_timer(m_scavengeTimer, startTime, kScavengeDelayInSeconds * NSEC_PER_SEC, 1000 * NSEC_PER_USEC); 1523 dispatch_source_set_event_handler(m_scavengeTimer, ^{ periodicScavenge(); }); 1524 m_scavengingSuspended = true; 1525} 1526 1527ALWAYS_INLINE bool TCMalloc_PageHeap::isScavengerSuspended() 1528{ 1529 ASSERT(IsHeld(pageheap_lock)); 1530 return m_scavengingSuspended; 1531} 1532 1533ALWAYS_INLINE void TCMalloc_PageHeap::scheduleScavenger() 1534{ 1535 ASSERT(IsHeld(pageheap_lock)); 1536 m_scavengingSuspended = false; 1537 dispatch_resume(m_scavengeTimer); 1538} 1539 1540ALWAYS_INLINE void TCMalloc_PageHeap::rescheduleScavenger() 1541{ 1542 // Nothing to do here for libdispatch. 1543} 1544 1545ALWAYS_INLINE void TCMalloc_PageHeap::suspendScavenger() 1546{ 1547 ASSERT(IsHeld(pageheap_lock)); 1548 m_scavengingSuspended = true; 1549 dispatch_suspend(m_scavengeTimer); 1550} 1551 1552#elif OS(WINDOWS) 1553 1554void TCMalloc_PageHeap::scavengerTimerFired(void* context, BOOLEAN) 1555{ 1556 static_cast<TCMalloc_PageHeap*>(context)->periodicScavenge(); 1557} 1558 1559void TCMalloc_PageHeap::initializeScavenger() 1560{ 1561 m_scavengeQueueTimer = 0; 1562} 1563 1564ALWAYS_INLINE bool TCMalloc_PageHeap::isScavengerSuspended() 1565{ 1566 ASSERT(IsHeld(pageheap_lock)); 1567 return !m_scavengeQueueTimer; 1568} 1569 1570ALWAYS_INLINE void TCMalloc_PageHeap::scheduleScavenger() 1571{ 1572 // We need to use WT_EXECUTEONLYONCE here and reschedule the timer, because 1573 // Windows will fire the timer event even when the function is already running. 1574 ASSERT(IsHeld(pageheap_lock)); 1575 CreateTimerQueueTimer(&m_scavengeQueueTimer, 0, scavengerTimerFired, this, kScavengeDelayInSeconds * 1000, 0, WT_EXECUTEONLYONCE); 1576} 1577 1578ALWAYS_INLINE void TCMalloc_PageHeap::rescheduleScavenger() 1579{ 1580 // We must delete the timer and create it again, because it is not possible to retrigger a timer on Windows. 1581 suspendScavenger(); 1582 scheduleScavenger(); 1583} 1584 1585ALWAYS_INLINE void TCMalloc_PageHeap::suspendScavenger() 1586{ 1587 ASSERT(IsHeld(pageheap_lock)); 1588 HANDLE scavengeQueueTimer = m_scavengeQueueTimer; 1589 m_scavengeQueueTimer = 0; 1590 DeleteTimerQueueTimer(0, scavengeQueueTimer, 0); 1591} 1592 1593#else 1594 1595void TCMalloc_PageHeap::initializeScavenger() 1596{ 1597 // Create a non-recursive mutex. 1598#if !defined(PTHREAD_MUTEX_NORMAL) || PTHREAD_MUTEX_NORMAL == PTHREAD_MUTEX_DEFAULT 1599 pthread_mutex_init(&m_scavengeMutex, 0); 1600#else 1601 pthread_mutexattr_t attr; 1602 pthread_mutexattr_init(&attr); 1603 pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL); 1604 1605 pthread_mutex_init(&m_scavengeMutex, &attr); 1606 1607 pthread_mutexattr_destroy(&attr); 1608#endif 1609 1610 pthread_cond_init(&m_scavengeCondition, 0); 1611 m_scavengeThreadActive = true; 1612 pthread_t thread; 1613 pthread_create(&thread, 0, runScavengerThread, this); 1614} 1615 1616void* TCMalloc_PageHeap::runScavengerThread(void* context) 1617{ 1618 static_cast<TCMalloc_PageHeap*>(context)->scavengerThread(); 1619#if (COMPILER(MSVC) || COMPILER(SUNCC)) 1620 // Without this, Visual Studio and Sun Studio will complain that this method does not return a value. 1621 return 0; 1622#endif 1623} 1624 1625ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger() 1626{ 1627 // m_scavengeMutex should be held before accessing m_scavengeThreadActive. 1628 ASSERT(pthread_mutex_trylock(m_scavengeMutex)); 1629 if (!m_scavengeThreadActive && shouldScavenge()) 1630 pthread_cond_signal(&m_scavengeCondition); 1631} 1632 1633#endif 1634 1635void TCMalloc_PageHeap::scavenge() 1636{ 1637 size_t pagesToRelease = min_free_committed_pages_since_last_scavenge_ * kScavengePercentage; 1638 size_t targetPageCount = std::max<size_t>(kMinimumFreeCommittedPageCount, free_committed_pages_ - pagesToRelease); 1639 1640 while (free_committed_pages_ > targetPageCount) { 1641 for (int i = kMaxPages; i > 0 && free_committed_pages_ >= targetPageCount; i--) { 1642 SpanList* slist = (static_cast<size_t>(i) == kMaxPages) ? &large_ : &free_[i]; 1643 // If the span size is bigger than kMinSpanListsWithSpans pages return all the spans in the list, else return all but 1 span. 1644 // Return only 50% of a spanlist at a time so spans of size 1 are not the only ones left. 1645 size_t length = DLL_Length(&slist->normal); 1646 size_t numSpansToReturn = (i > kMinSpanListsWithSpans) ? length : length / 2; 1647 for (int j = 0; static_cast<size_t>(j) < numSpansToReturn && !DLL_IsEmpty(&slist->normal) && free_committed_pages_ > targetPageCount; j++) { 1648 Span* s = slist->normal.prev; 1649 DLL_Remove(s); 1650 ASSERT(!s->decommitted); 1651 if (!s->decommitted) { 1652 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), 1653 static_cast<size_t>(s->length << kPageShift)); 1654 ASSERT(free_committed_pages_ >= s->length); 1655 free_committed_pages_ -= s->length; 1656 s->decommitted = true; 1657 } 1658 DLL_Prepend(&slist->returned, s); 1659 } 1660 } 1661 } 1662 1663 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1664} 1665 1666ALWAYS_INLINE bool TCMalloc_PageHeap::shouldScavenge() const 1667{ 1668 return free_committed_pages_ > kMinimumFreeCommittedPageCount; 1669} 1670 1671#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1672 1673inline Span* TCMalloc_PageHeap::New(Length n) { 1674 ASSERT(Check()); 1675 ASSERT(n > 0); 1676 1677 // Find first size >= n that has a non-empty list 1678 for (Length s = n; s < kMaxPages; s++) { 1679 Span* ll = NULL; 1680 bool released = false; 1681 if (!DLL_IsEmpty(&free_[s].normal)) { 1682 // Found normal span 1683 ll = &free_[s].normal; 1684 } else if (!DLL_IsEmpty(&free_[s].returned)) { 1685 // Found returned span; reallocate it 1686 ll = &free_[s].returned; 1687 released = true; 1688 } else { 1689 // Keep looking in larger classes 1690 continue; 1691 } 1692 1693 Span* result = ll->next; 1694 Carve(result, n, released); 1695#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1696 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the 1697 // free committed pages count. 1698 ASSERT(free_committed_pages_ >= n); 1699 free_committed_pages_ -= n; 1700 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 1701 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1702#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1703 ASSERT(Check()); 1704 free_pages_ -= n; 1705 return result; 1706 } 1707 1708 Span* result = AllocLarge(n); 1709 if (result != NULL) { 1710 ASSERT_SPAN_COMMITTED(result); 1711 return result; 1712 } 1713 1714 // Grow the heap and try again 1715 if (!GrowHeap(n)) { 1716 ASSERT(Check()); 1717 return NULL; 1718 } 1719 1720 return AllocLarge(n); 1721} 1722 1723Span* TCMalloc_PageHeap::AllocLarge(Length n) { 1724 // find the best span (closest to n in size). 1725 // The following loops implements address-ordered best-fit. 1726 bool from_released = false; 1727 Span *best = NULL; 1728 1729 // Search through normal list 1730 for (Span* span = large_.normal.next; 1731 span != &large_.normal; 1732 span = span->next) { 1733 if (span->length >= n) { 1734 if ((best == NULL) 1735 || (span->length < best->length) 1736 || ((span->length == best->length) && (span->start < best->start))) { 1737 best = span; 1738 from_released = false; 1739 } 1740 } 1741 } 1742 1743 // Search through released list in case it has a better fit 1744 for (Span* span = large_.returned.next; 1745 span != &large_.returned; 1746 span = span->next) { 1747 if (span->length >= n) { 1748 if ((best == NULL) 1749 || (span->length < best->length) 1750 || ((span->length == best->length) && (span->start < best->start))) { 1751 best = span; 1752 from_released = true; 1753 } 1754 } 1755 } 1756 1757 if (best != NULL) { 1758 Carve(best, n, from_released); 1759#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1760 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the 1761 // free committed pages count. 1762 ASSERT(free_committed_pages_ >= n); 1763 free_committed_pages_ -= n; 1764 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 1765 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1766#endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1767 ASSERT(Check()); 1768 free_pages_ -= n; 1769 return best; 1770 } 1771 return NULL; 1772} 1773 1774Span* TCMalloc_PageHeap::Split(Span* span, Length n) { 1775 ASSERT(0 < n); 1776 ASSERT(n < span->length); 1777 ASSERT(!span->free); 1778 ASSERT(span->sizeclass == 0); 1779 Event(span, 'T', n); 1780 1781 const Length extra = span->length - n; 1782 Span* leftover = NewSpan(span->start + n, extra); 1783 Event(leftover, 'U', extra); 1784 RecordSpan(leftover); 1785 pagemap_.set(span->start + n - 1, span); // Update map from pageid to span 1786 span->length = n; 1787 1788 return leftover; 1789} 1790 1791inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) { 1792 ASSERT(n > 0); 1793 DLL_Remove(span); 1794 span->free = 0; 1795 Event(span, 'A', n); 1796 1797 if (released) { 1798 // If the span chosen to carve from is decommited, commit the entire span at once to avoid committing spans 1 page at a time. 1799 ASSERT(span->decommitted); 1800 TCMalloc_SystemCommit(reinterpret_cast<void*>(span->start << kPageShift), static_cast<size_t>(span->length << kPageShift)); 1801 span->decommitted = false; 1802#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1803 free_committed_pages_ += span->length; 1804#endif 1805 } 1806 1807 const int extra = static_cast<int>(span->length - n); 1808 ASSERT(extra >= 0); 1809 if (extra > 0) { 1810 Span* leftover = NewSpan(span->start + n, extra); 1811 leftover->free = 1; 1812 leftover->decommitted = false; 1813 Event(leftover, 'S', extra); 1814 RecordSpan(leftover); 1815 1816 // Place leftover span on appropriate free list 1817 SpanList* listpair = (static_cast<size_t>(extra) < kMaxPages) ? &free_[extra] : &large_; 1818 Span* dst = &listpair->normal; 1819 DLL_Prepend(dst, leftover); 1820 1821 span->length = n; 1822 pagemap_.set(span->start + n - 1, span); 1823 } 1824} 1825 1826static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other) 1827{ 1828 if (destination->decommitted && !other->decommitted) { 1829 TCMalloc_SystemRelease(reinterpret_cast<void*>(other->start << kPageShift), 1830 static_cast<size_t>(other->length << kPageShift)); 1831 } else if (other->decommitted && !destination->decommitted) { 1832 TCMalloc_SystemRelease(reinterpret_cast<void*>(destination->start << kPageShift), 1833 static_cast<size_t>(destination->length << kPageShift)); 1834 destination->decommitted = true; 1835 } 1836} 1837 1838inline void TCMalloc_PageHeap::Delete(Span* span) { 1839 ASSERT(Check()); 1840 ASSERT(!span->free); 1841 ASSERT(span->length > 0); 1842 ASSERT(GetDescriptor(span->start) == span); 1843 ASSERT(GetDescriptor(span->start + span->length - 1) == span); 1844 span->sizeclass = 0; 1845#ifndef NO_TCMALLOC_SAMPLES 1846 span->sample = 0; 1847#endif 1848 1849 // Coalesce -- we guarantee that "p" != 0, so no bounds checking 1850 // necessary. We do not bother resetting the stale pagemap 1851 // entries for the pieces we are merging together because we only 1852 // care about the pagemap entries for the boundaries. 1853#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1854 // Track the total size of the neighboring free spans that are committed. 1855 Length neighboringCommittedSpansLength = 0; 1856#endif 1857 const PageID p = span->start; 1858 const Length n = span->length; 1859 Span* prev = GetDescriptor(p-1); 1860 if (prev != NULL && prev->free) { 1861 // Merge preceding span into this span 1862 ASSERT(prev->start + prev->length == p); 1863 const Length len = prev->length; 1864#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1865 if (!prev->decommitted) 1866 neighboringCommittedSpansLength += len; 1867#endif 1868 mergeDecommittedStates(span, prev); 1869 DLL_Remove(prev); 1870 DeleteSpan(prev); 1871 span->start -= len; 1872 span->length += len; 1873 pagemap_.set(span->start, span); 1874 Event(span, 'L', len); 1875 } 1876 Span* next = GetDescriptor(p+n); 1877 if (next != NULL && next->free) { 1878 // Merge next span into this span 1879 ASSERT(next->start == p+n); 1880 const Length len = next->length; 1881#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1882 if (!next->decommitted) 1883 neighboringCommittedSpansLength += len; 1884#endif 1885 mergeDecommittedStates(span, next); 1886 DLL_Remove(next); 1887 DeleteSpan(next); 1888 span->length += len; 1889 pagemap_.set(span->start + span->length - 1, span); 1890 Event(span, 'R', len); 1891 } 1892 1893 Event(span, 'D', span->length); 1894 span->free = 1; 1895 if (span->decommitted) { 1896 if (span->length < kMaxPages) 1897 DLL_Prepend(&free_[span->length].returned, span); 1898 else 1899 DLL_Prepend(&large_.returned, span); 1900 } else { 1901 if (span->length < kMaxPages) 1902 DLL_Prepend(&free_[span->length].normal, span); 1903 else 1904 DLL_Prepend(&large_.normal, span); 1905 } 1906 free_pages_ += n; 1907 1908#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1909 if (span->decommitted) { 1910 // If the merged span is decommitted, that means we decommitted any neighboring spans that were 1911 // committed. Update the free committed pages count. 1912 free_committed_pages_ -= neighboringCommittedSpansLength; 1913 if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_) 1914 min_free_committed_pages_since_last_scavenge_ = free_committed_pages_; 1915 } else { 1916 // If the merged span remains committed, add the deleted span's size to the free committed pages count. 1917 free_committed_pages_ += n; 1918 } 1919 1920 // Make sure the scavenge thread becomes active if we have enough freed pages to release some back to the system. 1921 signalScavenger(); 1922#else 1923 IncrementalScavenge(n); 1924#endif 1925 1926 ASSERT(Check()); 1927} 1928 1929#if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1930void TCMalloc_PageHeap::IncrementalScavenge(Length n) { 1931 // Fast path; not yet time to release memory 1932 scavenge_counter_ -= n; 1933 if (scavenge_counter_ >= 0) return; // Not yet time to scavenge 1934 1935 // If there is nothing to release, wait for so many pages before 1936 // scavenging again. With 4K pages, this comes to 16MB of memory. 1937 static const size_t kDefaultReleaseDelay = 1 << 8; 1938 1939 // Find index of free list to scavenge 1940 size_t index = scavenge_index_ + 1; 1941 for (size_t i = 0; i < kMaxPages+1; i++) { 1942 if (index > kMaxPages) index = 0; 1943 SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index]; 1944 if (!DLL_IsEmpty(&slist->normal)) { 1945 // Release the last span on the normal portion of this list 1946 Span* s = slist->normal.prev; 1947 DLL_Remove(s); 1948 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), 1949 static_cast<size_t>(s->length << kPageShift)); 1950 s->decommitted = true; 1951 DLL_Prepend(&slist->returned, s); 1952 1953 scavenge_counter_ = std::max<size_t>(64UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay))); 1954 1955 if (index == kMaxPages && !DLL_IsEmpty(&slist->normal)) 1956 scavenge_index_ = index - 1; 1957 else 1958 scavenge_index_ = index; 1959 return; 1960 } 1961 index++; 1962 } 1963 1964 // Nothing to scavenge, delay for a while 1965 scavenge_counter_ = kDefaultReleaseDelay; 1966} 1967#endif 1968 1969void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) { 1970 // Associate span object with all interior pages as well 1971 ASSERT(!span->free); 1972 ASSERT(GetDescriptor(span->start) == span); 1973 ASSERT(GetDescriptor(span->start+span->length-1) == span); 1974 Event(span, 'C', sc); 1975 span->sizeclass = static_cast<unsigned int>(sc); 1976 for (Length i = 1; i < span->length-1; i++) { 1977 pagemap_.set(span->start+i, span); 1978 } 1979} 1980 1981#ifdef WTF_CHANGES 1982size_t TCMalloc_PageHeap::ReturnedBytes() const { 1983 size_t result = 0; 1984 for (unsigned s = 0; s < kMaxPages; s++) { 1985 const int r_length = DLL_Length(&free_[s].returned); 1986 unsigned r_pages = s * r_length; 1987 result += r_pages << kPageShift; 1988 } 1989 1990 for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) 1991 result += s->length << kPageShift; 1992 return result; 1993} 1994#endif 1995 1996#ifndef WTF_CHANGES 1997static double PagesToMB(uint64_t pages) { 1998 return (pages << kPageShift) / 1048576.0; 1999} 2000 2001void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) { 2002 int nonempty_sizes = 0; 2003 for (int s = 0; s < kMaxPages; s++) { 2004 if (!DLL_IsEmpty(&free_[s].normal) || !DLL_IsEmpty(&free_[s].returned)) { 2005 nonempty_sizes++; 2006 } 2007 } 2008 out->printf("------------------------------------------------\n"); 2009 out->printf("PageHeap: %d sizes; %6.1f MB free\n", 2010 nonempty_sizes, PagesToMB(free_pages_)); 2011 out->printf("------------------------------------------------\n"); 2012 uint64_t total_normal = 0; 2013 uint64_t total_returned = 0; 2014 for (int s = 0; s < kMaxPages; s++) { 2015 const int n_length = DLL_Length(&free_[s].normal); 2016 const int r_length = DLL_Length(&free_[s].returned); 2017 if (n_length + r_length > 0) { 2018 uint64_t n_pages = s * n_length; 2019 uint64_t r_pages = s * r_length; 2020 total_normal += n_pages; 2021 total_returned += r_pages; 2022 out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum" 2023 "; unmapped: %6.1f MB; %6.1f MB cum\n", 2024 s, 2025 (n_length + r_length), 2026 PagesToMB(n_pages + r_pages), 2027 PagesToMB(total_normal + total_returned), 2028 PagesToMB(r_pages), 2029 PagesToMB(total_returned)); 2030 } 2031 } 2032 2033 uint64_t n_pages = 0; 2034 uint64_t r_pages = 0; 2035 int n_spans = 0; 2036 int r_spans = 0; 2037 out->printf("Normal large spans:\n"); 2038 for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) { 2039 out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n", 2040 s->length, PagesToMB(s->length)); 2041 n_pages += s->length; 2042 n_spans++; 2043 } 2044 out->printf("Unmapped large spans:\n"); 2045 for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) { 2046 out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n", 2047 s->length, PagesToMB(s->length)); 2048 r_pages += s->length; 2049 r_spans++; 2050 } 2051 total_normal += n_pages; 2052 total_returned += r_pages; 2053 out->printf(">255 large * %6u spans ~ %6.1f MB; %6.1f MB cum" 2054 "; unmapped: %6.1f MB; %6.1f MB cum\n", 2055 (n_spans + r_spans), 2056 PagesToMB(n_pages + r_pages), 2057 PagesToMB(total_normal + total_returned), 2058 PagesToMB(r_pages), 2059 PagesToMB(total_returned)); 2060} 2061#endif 2062 2063bool TCMalloc_PageHeap::GrowHeap(Length n) { 2064 ASSERT(kMaxPages >= kMinSystemAlloc); 2065 if (n > kMaxValidPages) return false; 2066 Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc); 2067 size_t actual_size; 2068 void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); 2069 if (ptr == NULL) { 2070 if (n < ask) { 2071 // Try growing just "n" pages 2072 ask = n; 2073 ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); 2074 } 2075 if (ptr == NULL) return false; 2076 } 2077 ask = actual_size >> kPageShift; 2078 2079 uint64_t old_system_bytes = system_bytes_; 2080 system_bytes_ += (ask << kPageShift); 2081 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 2082 ASSERT(p > 0); 2083 2084 // If we have already a lot of pages allocated, just pre allocate a bunch of 2085 // memory for the page map. This prevents fragmentation by pagemap metadata 2086 // when a program keeps allocating and freeing large blocks. 2087 2088 if (old_system_bytes < kPageMapBigAllocationThreshold 2089 && system_bytes_ >= kPageMapBigAllocationThreshold) { 2090 pagemap_.PreallocateMoreMemory(); 2091 } 2092 2093 // Make sure pagemap_ has entries for all of the new pages. 2094 // Plus ensure one before and one after so coalescing code 2095 // does not need bounds-checking. 2096 if (pagemap_.Ensure(p-1, ask+2)) { 2097 // Pretend the new area is allocated and then Delete() it to 2098 // cause any necessary coalescing to occur. 2099 // 2100 // We do not adjust free_pages_ here since Delete() will do it for us. 2101 Span* span = NewSpan(p, ask); 2102 RecordSpan(span); 2103 Delete(span); 2104 ASSERT(Check()); 2105 return true; 2106 } else { 2107 // We could not allocate memory within "pagemap_" 2108 // TODO: Once we can return memory to the system, return the new span 2109 return false; 2110 } 2111} 2112 2113bool TCMalloc_PageHeap::Check() { 2114 ASSERT(free_[0].normal.next == &free_[0].normal); 2115 ASSERT(free_[0].returned.next == &free_[0].returned); 2116 CheckList(&large_.normal, kMaxPages, 1000000000); 2117 CheckList(&large_.returned, kMaxPages, 1000000000); 2118 for (Length s = 1; s < kMaxPages; s++) { 2119 CheckList(&free_[s].normal, s, s); 2120 CheckList(&free_[s].returned, s, s); 2121 } 2122 return true; 2123} 2124 2125#if ASSERT_DISABLED 2126bool TCMalloc_PageHeap::CheckList(Span*, Length, Length) { 2127 return true; 2128} 2129#else 2130bool TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages) { 2131 for (Span* s = list->next; s != list; s = s->next) { 2132 CHECK_CONDITION(s->free); 2133 CHECK_CONDITION(s->length >= min_pages); 2134 CHECK_CONDITION(s->length <= max_pages); 2135 CHECK_CONDITION(GetDescriptor(s->start) == s); 2136 CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s); 2137 } 2138 return true; 2139} 2140#endif 2141 2142static void ReleaseFreeList(Span* list, Span* returned) { 2143 // Walk backwards through list so that when we push these 2144 // spans on the "returned" list, we preserve the order. 2145 while (!DLL_IsEmpty(list)) { 2146 Span* s = list->prev; 2147 DLL_Remove(s); 2148 DLL_Prepend(returned, s); 2149 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), 2150 static_cast<size_t>(s->length << kPageShift)); 2151 } 2152} 2153 2154void TCMalloc_PageHeap::ReleaseFreePages() { 2155 for (Length s = 0; s < kMaxPages; s++) { 2156 ReleaseFreeList(&free_[s].normal, &free_[s].returned); 2157 } 2158 ReleaseFreeList(&large_.normal, &large_.returned); 2159 ASSERT(Check()); 2160} 2161 2162//------------------------------------------------------------------- 2163// Free list 2164//------------------------------------------------------------------- 2165 2166class TCMalloc_ThreadCache_FreeList { 2167 private: 2168 void* list_; // Linked list of nodes 2169 uint16_t length_; // Current length 2170 uint16_t lowater_; // Low water mark for list length 2171 2172 public: 2173 void Init() { 2174 list_ = NULL; 2175 length_ = 0; 2176 lowater_ = 0; 2177 } 2178 2179 // Return current length of list 2180 int length() const { 2181 return length_; 2182 } 2183 2184 // Is list empty? 2185 bool empty() const { 2186 return list_ == NULL; 2187 } 2188 2189 // Low-water mark management 2190 int lowwatermark() const { return lowater_; } 2191 void clear_lowwatermark() { lowater_ = length_; } 2192 2193 ALWAYS_INLINE void Push(void* ptr) { 2194 SLL_Push(&list_, ptr); 2195 length_++; 2196 } 2197 2198 void PushRange(int N, void *start, void *end) { 2199 SLL_PushRange(&list_, start, end); 2200 length_ = length_ + static_cast<uint16_t>(N); 2201 } 2202 2203 void PopRange(int N, void **start, void **end) { 2204 SLL_PopRange(&list_, N, start, end); 2205 ASSERT(length_ >= N); 2206 length_ = length_ - static_cast<uint16_t>(N); 2207 if (length_ < lowater_) lowater_ = length_; 2208 } 2209 2210 ALWAYS_INLINE void* Pop() { 2211 ASSERT(list_ != NULL); 2212 length_--; 2213 if (length_ < lowater_) lowater_ = length_; 2214 return SLL_Pop(&list_); 2215 } 2216 2217#ifdef WTF_CHANGES 2218 template <class Finder, class Reader> 2219 void enumerateFreeObjects(Finder& finder, const Reader& reader) 2220 { 2221 for (void* nextObject = list_; nextObject; nextObject = reader.nextEntryInLinkedList(reinterpret_cast<void**>(nextObject))) 2222 finder.visit(nextObject); 2223 } 2224#endif 2225}; 2226 2227//------------------------------------------------------------------- 2228// Data kept per thread 2229//------------------------------------------------------------------- 2230 2231class TCMalloc_ThreadCache { 2232 private: 2233 typedef TCMalloc_ThreadCache_FreeList FreeList; 2234#if OS(WINDOWS) 2235 typedef DWORD ThreadIdentifier; 2236#else 2237 typedef pthread_t ThreadIdentifier; 2238#endif 2239 2240 size_t size_; // Combined size of data 2241 ThreadIdentifier tid_; // Which thread owns it 2242 bool in_setspecific_; // Called pthread_setspecific? 2243 FreeList list_[kNumClasses]; // Array indexed by size-class 2244 2245 // We sample allocations, biased by the size of the allocation 2246 uint32_t rnd_; // Cheap random number generator 2247 size_t bytes_until_sample_; // Bytes until we sample next 2248 2249 // Allocate a new heap. REQUIRES: pageheap_lock is held. 2250 static inline TCMalloc_ThreadCache* NewHeap(ThreadIdentifier tid); 2251 2252 // Use only as pthread thread-specific destructor function. 2253 static void DestroyThreadCache(void* ptr); 2254 public: 2255 // All ThreadCache objects are kept in a linked list (for stats collection) 2256 TCMalloc_ThreadCache* next_; 2257 TCMalloc_ThreadCache* prev_; 2258 2259 void Init(ThreadIdentifier tid); 2260 void Cleanup(); 2261 2262 // Accessors (mostly just for printing stats) 2263 int freelist_length(size_t cl) const { return list_[cl].length(); } 2264 2265 // Total byte size in cache 2266 size_t Size() const { return size_; } 2267 2268 ALWAYS_INLINE void* Allocate(size_t size); 2269 void Deallocate(void* ptr, size_t size_class); 2270 2271 ALWAYS_INLINE void FetchFromCentralCache(size_t cl, size_t allocationSize); 2272 void ReleaseToCentralCache(size_t cl, int N); 2273 void Scavenge(); 2274 void Print() const; 2275 2276 // Record allocation of "k" bytes. Return true iff allocation 2277 // should be sampled 2278 bool SampleAllocation(size_t k); 2279 2280 // Pick next sampling point 2281 void PickNextSample(size_t k); 2282 2283 static void InitModule(); 2284 static void InitTSD(); 2285 static TCMalloc_ThreadCache* GetThreadHeap(); 2286 static TCMalloc_ThreadCache* GetCache(); 2287 static TCMalloc_ThreadCache* GetCacheIfPresent(); 2288 static TCMalloc_ThreadCache* CreateCacheIfNecessary(); 2289 static void DeleteCache(TCMalloc_ThreadCache* heap); 2290 static void BecomeIdle(); 2291 static void RecomputeThreadCacheSize(); 2292 2293#ifdef WTF_CHANGES 2294 template <class Finder, class Reader> 2295 void enumerateFreeObjects(Finder& finder, const Reader& reader) 2296 { 2297 for (unsigned sizeClass = 0; sizeClass < kNumClasses; sizeClass++) 2298 list_[sizeClass].enumerateFreeObjects(finder, reader); 2299 } 2300#endif 2301}; 2302 2303//------------------------------------------------------------------- 2304// Data kept per size-class in central cache 2305//------------------------------------------------------------------- 2306 2307class TCMalloc_Central_FreeList { 2308 public: 2309 void Init(size_t cl); 2310 2311 // These methods all do internal locking. 2312 2313 // Insert the specified range into the central freelist. N is the number of 2314 // elements in the range. 2315 void InsertRange(void *start, void *end, int N); 2316 2317 // Returns the actual number of fetched elements into N. 2318 void RemoveRange(void **start, void **end, int *N); 2319 2320 // Returns the number of free objects in cache. 2321 size_t length() { 2322 SpinLockHolder h(&lock_); 2323 return counter_; 2324 } 2325 2326 // Returns the number of free objects in the transfer cache. 2327 int tc_length() { 2328 SpinLockHolder h(&lock_); 2329 return used_slots_ * num_objects_to_move[size_class_]; 2330 } 2331 2332#ifdef WTF_CHANGES 2333 template <class Finder, class Reader> 2334 void enumerateFreeObjects(Finder& finder, const Reader& reader, TCMalloc_Central_FreeList* remoteCentralFreeList) 2335 { 2336 for (Span* span = &empty_; span && span != &empty_; span = (span->next ? reader(span->next) : 0)) 2337 ASSERT(!span->objects); 2338 2339 ASSERT(!nonempty_.objects); 2340 static const ptrdiff_t nonemptyOffset = reinterpret_cast<const char*>(&nonempty_) - reinterpret_cast<const char*>(this); 2341 2342 Span* remoteNonempty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + nonemptyOffset); 2343 Span* remoteSpan = nonempty_.next; 2344 2345 for (Span* span = reader(remoteSpan); span && remoteSpan != remoteNonempty; remoteSpan = span->next, span = (span->next ? reader(span->next) : 0)) { 2346 for (void* nextObject = span->objects; nextObject; nextObject = reader.nextEntryInLinkedList(reinterpret_cast<void**>(nextObject))) 2347 finder.visit(nextObject); 2348 } 2349 } 2350#endif 2351 2352 private: 2353 // REQUIRES: lock_ is held 2354 // Remove object from cache and return. 2355 // Return NULL if no free entries in cache. 2356 void* FetchFromSpans(); 2357 2358 // REQUIRES: lock_ is held 2359 // Remove object from cache and return. Fetches 2360 // from pageheap if cache is empty. Only returns 2361 // NULL on allocation failure. 2362 void* FetchFromSpansSafe(); 2363 2364 // REQUIRES: lock_ is held 2365 // Release a linked list of objects to spans. 2366 // May temporarily release lock_. 2367 void ReleaseListToSpans(void *start); 2368 2369 // REQUIRES: lock_ is held 2370 // Release an object to spans. 2371 // May temporarily release lock_. 2372 ALWAYS_INLINE void ReleaseToSpans(void* object); 2373 2374 // REQUIRES: lock_ is held 2375 // Populate cache by fetching from the page heap. 2376 // May temporarily release lock_. 2377 ALWAYS_INLINE void Populate(); 2378 2379 // REQUIRES: lock is held. 2380 // Tries to make room for a TCEntry. If the cache is full it will try to 2381 // expand it at the cost of some other cache size. Return false if there is 2382 // no space. 2383 bool MakeCacheSpace(); 2384 2385 // REQUIRES: lock_ for locked_size_class is held. 2386 // Picks a "random" size class to steal TCEntry slot from. In reality it 2387 // just iterates over the sizeclasses but does so without taking a lock. 2388 // Returns true on success. 2389 // May temporarily lock a "random" size class. 2390 static ALWAYS_INLINE bool EvictRandomSizeClass(size_t locked_size_class, bool force); 2391 2392 // REQUIRES: lock_ is *not* held. 2393 // Tries to shrink the Cache. If force is true it will relase objects to 2394 // spans if it allows it to shrink the cache. Return false if it failed to 2395 // shrink the cache. Decrements cache_size_ on succeess. 2396 // May temporarily take lock_. If it takes lock_, the locked_size_class 2397 // lock is released to the thread from holding two size class locks 2398 // concurrently which could lead to a deadlock. 2399 bool ShrinkCache(int locked_size_class, bool force); 2400 2401 // This lock protects all the data members. cached_entries and cache_size_ 2402 // may be looked at without holding the lock. 2403 SpinLock lock_; 2404 2405 // We keep linked lists of empty and non-empty spans. 2406 size_t size_class_; // My size class 2407 Span empty_; // Dummy header for list of empty spans 2408 Span nonempty_; // Dummy header for list of non-empty spans 2409 size_t counter_; // Number of free objects in cache entry 2410 2411 // Here we reserve space for TCEntry cache slots. Since one size class can 2412 // end up getting all the TCEntries quota in the system we just preallocate 2413 // sufficient number of entries here. 2414 TCEntry tc_slots_[kNumTransferEntries]; 2415 2416 // Number of currently used cached entries in tc_slots_. This variable is 2417 // updated under a lock but can be read without one. 2418 int32_t used_slots_; 2419 // The current number of slots for this size class. This is an 2420 // adaptive value that is increased if there is lots of traffic 2421 // on a given size class. 2422 int32_t cache_size_; 2423}; 2424 2425// Pad each CentralCache object to multiple of 64 bytes 2426class TCMalloc_Central_FreeListPadded : public TCMalloc_Central_FreeList { 2427 private: 2428 char pad_[(64 - (sizeof(TCMalloc_Central_FreeList) % 64)) % 64]; 2429}; 2430 2431//------------------------------------------------------------------- 2432// Global variables 2433//------------------------------------------------------------------- 2434 2435// Central cache -- a collection of free-lists, one per size-class. 2436// We have a separate lock per free-list to reduce contention. 2437static TCMalloc_Central_FreeListPadded central_cache[kNumClasses]; 2438 2439// Page-level allocator 2440static SpinLock pageheap_lock = SPINLOCK_INITIALIZER; 2441static AllocAlignmentInteger pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(AllocAlignmentInteger) - 1) / sizeof(AllocAlignmentInteger)]; 2442static bool phinited = false; 2443 2444// Avoid extra level of indirection by making "pageheap" be just an alias 2445// of pageheap_memory. 2446typedef union { 2447 void* m_memory; 2448 TCMalloc_PageHeap* m_pageHeap; 2449} PageHeapUnion; 2450 2451static inline TCMalloc_PageHeap* getPageHeap() 2452{ 2453 PageHeapUnion u = { &pageheap_memory[0] }; 2454 return u.m_pageHeap; 2455} 2456 2457#define pageheap getPageHeap() 2458 2459#if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 2460 2461#if HAVE(DISPATCH_H) || OS(WINDOWS) 2462 2463void TCMalloc_PageHeap::periodicScavenge() 2464{ 2465 SpinLockHolder h(&pageheap_lock); 2466 pageheap->scavenge(); 2467 2468 if (shouldScavenge()) { 2469 rescheduleScavenger(); 2470 return; 2471 } 2472 2473 suspendScavenger(); 2474} 2475 2476ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger() 2477{ 2478 ASSERT(IsHeld(pageheap_lock)); 2479 if (isScavengerSuspended() && shouldScavenge()) 2480 scheduleScavenger(); 2481} 2482 2483#else 2484 2485void TCMalloc_PageHeap::scavengerThread() 2486{ 2487#if HAVE(PTHREAD_SETNAME_NP) 2488 pthread_setname_np("JavaScriptCore: FastMalloc scavenger"); 2489#endif 2490 2491 while (1) { 2492 if (!shouldScavenge()) { 2493 pthread_mutex_lock(&m_scavengeMutex); 2494 m_scavengeThreadActive = false; 2495 // Block until there are enough free committed pages to release back to the system. 2496 pthread_cond_wait(&m_scavengeCondition, &m_scavengeMutex); 2497 m_scavengeThreadActive = true; 2498 pthread_mutex_unlock(&m_scavengeMutex); 2499 } 2500 sleep(kScavengeDelayInSeconds); 2501 { 2502 SpinLockHolder h(&pageheap_lock); 2503 pageheap->scavenge(); 2504 } 2505 } 2506} 2507 2508#endif 2509 2510#endif 2511 2512// If TLS is available, we also store a copy 2513// of the per-thread object in a __thread variable 2514// since __thread variables are faster to read 2515// than pthread_getspecific(). We still need 2516// pthread_setspecific() because __thread 2517// variables provide no way to run cleanup 2518// code when a thread is destroyed. 2519#ifdef HAVE_TLS 2520static __thread TCMalloc_ThreadCache *threadlocal_heap; 2521#endif 2522// Thread-specific key. Initialization here is somewhat tricky 2523// because some Linux startup code invokes malloc() before it 2524// is in a good enough state to handle pthread_keycreate(). 2525// Therefore, we use TSD keys only after tsd_inited is set to true. 2526// Until then, we use a slow path to get the heap object. 2527static bool tsd_inited = false; 2528#if USE(PTHREAD_GETSPECIFIC_DIRECT) 2529static const pthread_key_t heap_key = __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY0; 2530#else 2531static pthread_key_t heap_key; 2532#endif 2533#if OS(WINDOWS) 2534DWORD tlsIndex = TLS_OUT_OF_INDEXES; 2535#endif 2536 2537static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap) 2538{ 2539#if USE(PTHREAD_GETSPECIFIC_DIRECT) 2540 // Can't have two libraries both doing this in the same process, 2541 // so check and make this crash right away. 2542 if (pthread_getspecific(heap_key)) 2543 CRASH(); 2544#endif 2545 2546 // Still do pthread_setspecific even if there's an alternate form 2547 // of thread-local storage in use, to benefit from the delete callback. 2548 pthread_setspecific(heap_key, heap); 2549 2550#if OS(WINDOWS) 2551 TlsSetValue(tlsIndex, heap); 2552#endif 2553} 2554 2555// Allocator for thread heaps 2556static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator; 2557 2558// Linked list of heap objects. Protected by pageheap_lock. 2559static TCMalloc_ThreadCache* thread_heaps = NULL; 2560static int thread_heap_count = 0; 2561 2562// Overall thread cache size. Protected by pageheap_lock. 2563static size_t overall_thread_cache_size = kDefaultOverallThreadCacheSize; 2564 2565// Global per-thread cache size. Writes are protected by 2566// pageheap_lock. Reads are done without any locking, which should be 2567// fine as long as size_t can be written atomically and we don't place 2568// invariants between this variable and other pieces of state. 2569static volatile size_t per_thread_cache_size = kMaxThreadCacheSize; 2570 2571//------------------------------------------------------------------- 2572// Central cache implementation 2573//------------------------------------------------------------------- 2574 2575void TCMalloc_Central_FreeList::Init(size_t cl) { 2576 lock_.Init(); 2577 size_class_ = cl; 2578 DLL_Init(&empty_); 2579 DLL_Init(&nonempty_); 2580 counter_ = 0; 2581 2582 cache_size_ = 1; 2583 used_slots_ = 0; 2584 ASSERT(cache_size_ <= kNumTransferEntries); 2585} 2586 2587void TCMalloc_Central_FreeList::ReleaseListToSpans(void* start) { 2588 while (start) { 2589 void *next = SLL_Next(start); 2590 ReleaseToSpans(start); 2591 start = next; 2592 } 2593} 2594 2595ALWAYS_INLINE void TCMalloc_Central_FreeList::ReleaseToSpans(void* object) { 2596 const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift; 2597 Span* span = pageheap->GetDescriptor(p); 2598 ASSERT(span != NULL); 2599 ASSERT(span->refcount > 0); 2600 2601 // If span is empty, move it to non-empty list 2602 if (span->objects == NULL) { 2603 DLL_Remove(span); 2604 DLL_Prepend(&nonempty_, span); 2605 Event(span, 'N', 0); 2606 } 2607 2608 // The following check is expensive, so it is disabled by default 2609 if (false) { 2610 // Check that object does not occur in list 2611 unsigned got = 0; 2612 for (void* p = span->objects; p != NULL; p = *((void**) p)) { 2613 ASSERT(p != object); 2614 got++; 2615 } 2616 ASSERT(got + span->refcount == 2617 (span->length<<kPageShift)/ByteSizeForClass(span->sizeclass)); 2618 } 2619 2620 counter_++; 2621 span->refcount--; 2622 if (span->refcount == 0) { 2623 Event(span, '#', 0); 2624 counter_ -= (span->length<<kPageShift) / ByteSizeForClass(span->sizeclass); 2625 DLL_Remove(span); 2626 2627 // Release central list lock while operating on pageheap 2628 lock_.Unlock(); 2629 { 2630 SpinLockHolder h(&pageheap_lock); 2631 pageheap->Delete(span); 2632 } 2633 lock_.Lock(); 2634 } else { 2635 *(reinterpret_cast<void**>(object)) = span->objects; 2636 span->objects = object; 2637 } 2638} 2639 2640ALWAYS_INLINE bool TCMalloc_Central_FreeList::EvictRandomSizeClass( 2641 size_t locked_size_class, bool force) { 2642 static int race_counter = 0; 2643 int t = race_counter++; // Updated without a lock, but who cares. 2644 if (t >= static_cast<int>(kNumClasses)) { 2645 while (t >= static_cast<int>(kNumClasses)) { 2646 t -= kNumClasses; 2647 } 2648 race_counter = t; 2649 } 2650 ASSERT(t >= 0); 2651 ASSERT(t < static_cast<int>(kNumClasses)); 2652 if (t == static_cast<int>(locked_size_class)) return false; 2653 return central_cache[t].ShrinkCache(static_cast<int>(locked_size_class), force); 2654} 2655 2656bool TCMalloc_Central_FreeList::MakeCacheSpace() { 2657 // Is there room in the cache? 2658 if (used_slots_ < cache_size_) return true; 2659 // Check if we can expand this cache? 2660 if (cache_size_ == kNumTransferEntries) return false; 2661 // Ok, we'll try to grab an entry from some other size class. 2662 if (EvictRandomSizeClass(size_class_, false) || 2663 EvictRandomSizeClass(size_class_, true)) { 2664 // Succeeded in evicting, we're going to make our cache larger. 2665 cache_size_++; 2666 return true; 2667 } 2668 return false; 2669} 2670 2671 2672namespace { 2673class LockInverter { 2674 private: 2675 SpinLock *held_, *temp_; 2676 public: 2677 inline explicit LockInverter(SpinLock* held, SpinLock *temp) 2678 : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); } 2679 inline ~LockInverter() { temp_->Unlock(); held_->Lock(); } 2680}; 2681} 2682 2683bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class, bool force) { 2684 // Start with a quick check without taking a lock. 2685 if (cache_size_ == 0) return false; 2686 // We don't evict from a full cache unless we are 'forcing'. 2687 if (force == false && used_slots_ == cache_size_) return false; 2688 2689 // Grab lock, but first release the other lock held by this thread. We use 2690 // the lock inverter to ensure that we never hold two size class locks 2691 // concurrently. That can create a deadlock because there is no well 2692 // defined nesting order. 2693 LockInverter li(¢ral_cache[locked_size_class].lock_, &lock_); 2694 ASSERT(used_slots_ <= cache_size_); 2695 ASSERT(0 <= cache_size_); 2696 if (cache_size_ == 0) return false; 2697 if (used_slots_ == cache_size_) { 2698 if (force == false) return false; 2699 // ReleaseListToSpans releases the lock, so we have to make all the 2700 // updates to the central list before calling it. 2701 cache_size_--; 2702 used_slots_--; 2703 ReleaseListToSpans(tc_slots_[used_slots_].head); 2704 return true; 2705 } 2706 cache_size_--; 2707 return true; 2708} 2709 2710void TCMalloc_Central_FreeList::InsertRange(void *start, void *end, int N) { 2711 SpinLockHolder h(&lock_); 2712 if (N == num_objects_to_move[size_class_] && 2713 MakeCacheSpace()) { 2714 int slot = used_slots_++; 2715 ASSERT(slot >=0); 2716 ASSERT(slot < kNumTransferEntries); 2717 TCEntry *entry = &tc_slots_[slot]; 2718 entry->head = start; 2719 entry->tail = end; 2720 return; 2721 } 2722 ReleaseListToSpans(start); 2723} 2724 2725void TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int *N) { 2726 int num = *N; 2727 ASSERT(num > 0); 2728 2729 SpinLockHolder h(&lock_); 2730 if (num == num_objects_to_move[size_class_] && used_slots_ > 0) { 2731 int slot = --used_slots_; 2732 ASSERT(slot >= 0); 2733 TCEntry *entry = &tc_slots_[slot]; 2734 *start = entry->head; 2735 *end = entry->tail; 2736 return; 2737 } 2738 2739 // TODO: Prefetch multiple TCEntries? 2740 void *tail = FetchFromSpansSafe(); 2741 if (!tail) { 2742 // We are completely out of memory. 2743 *start = *end = NULL; 2744 *N = 0; 2745 return; 2746 } 2747 2748 SLL_SetNext(tail, NULL); 2749 void *head = tail; 2750 int count = 1; 2751 while (count < num) { 2752 void *t = FetchFromSpans(); 2753 if (!t) break; 2754 SLL_Push(&head, t); 2755 count++; 2756 } 2757 *start = head; 2758 *end = tail; 2759 *N = count; 2760} 2761 2762 2763void* TCMalloc_Central_FreeList::FetchFromSpansSafe() { 2764 void *t = FetchFromSpans(); 2765 if (!t) { 2766 Populate(); 2767 t = FetchFromSpans(); 2768 } 2769 return t; 2770} 2771 2772void* TCMalloc_Central_FreeList::FetchFromSpans() { 2773 if (DLL_IsEmpty(&nonempty_)) return NULL; 2774 Span* span = nonempty_.next; 2775 2776 ASSERT(span->objects != NULL); 2777 ASSERT_SPAN_COMMITTED(span); 2778 span->refcount++; 2779 void* result = span->objects; 2780 span->objects = *(reinterpret_cast<void**>(result)); 2781 if (span->objects == NULL) { 2782 // Move to empty list 2783 DLL_Remove(span); 2784 DLL_Prepend(&empty_, span); 2785 Event(span, 'E', 0); 2786 } 2787 counter_--; 2788 return result; 2789} 2790 2791// Fetch memory from the system and add to the central cache freelist. 2792ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() { 2793 // Release central list lock while operating on pageheap 2794 lock_.Unlock(); 2795 const size_t npages = class_to_pages[size_class_]; 2796 2797 Span* span; 2798 { 2799 SpinLockHolder h(&pageheap_lock); 2800 span = pageheap->New(npages); 2801 if (span) pageheap->RegisterSizeClass(span, size_class_); 2802 } 2803 if (span == NULL) { 2804#if HAVE(ERRNO_H) 2805 MESSAGE("allocation failed: %d\n", errno); 2806#elif OS(WINDOWS) 2807 MESSAGE("allocation failed: %d\n", ::GetLastError()); 2808#else 2809 MESSAGE("allocation failed\n"); 2810#endif 2811 lock_.Lock(); 2812 return; 2813 } 2814 ASSERT_SPAN_COMMITTED(span); 2815 ASSERT(span->length == npages); 2816 // Cache sizeclass info eagerly. Locking is not necessary. 2817 // (Instead of being eager, we could just replace any stale info 2818 // about this span, but that seems to be no better in practice.) 2819 for (size_t i = 0; i < npages; i++) { 2820 pageheap->CacheSizeClass(span->start + i, size_class_); 2821 } 2822 2823 // Split the block into pieces and add to the free-list 2824 // TODO: coloring of objects to avoid cache conflicts? 2825 void** tail = &span->objects; 2826 char* ptr = reinterpret_cast<char*>(span->start << kPageShift); 2827 char* limit = ptr + (npages << kPageShift); 2828 const size_t size = ByteSizeForClass(size_class_); 2829 int num = 0; 2830 char* nptr; 2831 while ((nptr = ptr + size) <= limit) { 2832 *tail = ptr; 2833 tail = reinterpret_cast_ptr<void**>(ptr); 2834 ptr = nptr; 2835 num++; 2836 } 2837 ASSERT(ptr <= limit); 2838 *tail = NULL; 2839 span->refcount = 0; // No sub-object in use yet 2840 2841 // Add span to list of non-empty spans 2842 lock_.Lock(); 2843 DLL_Prepend(&nonempty_, span); 2844 counter_ += num; 2845} 2846 2847//------------------------------------------------------------------- 2848// TCMalloc_ThreadCache implementation 2849//------------------------------------------------------------------- 2850 2851inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) { 2852 if (bytes_until_sample_ < k) { 2853 PickNextSample(k); 2854 return true; 2855 } else { 2856 bytes_until_sample_ -= k; 2857 return false; 2858 } 2859} 2860 2861void TCMalloc_ThreadCache::Init(ThreadIdentifier tid) { 2862 size_ = 0; 2863 next_ = NULL; 2864 prev_ = NULL; 2865 tid_ = tid; 2866 in_setspecific_ = false; 2867 for (size_t cl = 0; cl < kNumClasses; ++cl) { 2868 list_[cl].Init(); 2869 } 2870 2871 // Initialize RNG -- run it for a bit to get to good values 2872 bytes_until_sample_ = 0; 2873 rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this)); 2874 for (int i = 0; i < 100; i++) { 2875 PickNextSample(static_cast<size_t>(FLAGS_tcmalloc_sample_parameter * 2)); 2876 } 2877} 2878 2879void TCMalloc_ThreadCache::Cleanup() { 2880 // Put unused memory back into central cache 2881 for (size_t cl = 0; cl < kNumClasses; ++cl) { 2882 if (list_[cl].length() > 0) { 2883 ReleaseToCentralCache(cl, list_[cl].length()); 2884 } 2885 } 2886} 2887 2888ALWAYS_INLINE void* TCMalloc_ThreadCache::Allocate(size_t size) { 2889 ASSERT(size <= kMaxSize); 2890 const size_t cl = SizeClass(size); 2891 FreeList* list = &list_[cl]; 2892 size_t allocationSize = ByteSizeForClass(cl); 2893 if (list->empty()) { 2894 FetchFromCentralCache(cl, allocationSize); 2895 if (list->empty()) return NULL; 2896 } 2897 size_ -= allocationSize; 2898 return list->Pop(); 2899} 2900 2901inline void TCMalloc_ThreadCache::Deallocate(void* ptr, size_t cl) { 2902 size_ += ByteSizeForClass(cl); 2903 FreeList* list = &list_[cl]; 2904 list->Push(ptr); 2905 // If enough data is free, put back into central cache 2906 if (list->length() > kMaxFreeListLength) { 2907 ReleaseToCentralCache(cl, num_objects_to_move[cl]); 2908 } 2909 if (size_ >= per_thread_cache_size) Scavenge(); 2910} 2911 2912// Remove some objects of class "cl" from central cache and add to thread heap 2913ALWAYS_INLINE void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t allocationSize) { 2914 int fetch_count = num_objects_to_move[cl]; 2915 void *start, *end; 2916 central_cache[cl].RemoveRange(&start, &end, &fetch_count); 2917 list_[cl].PushRange(fetch_count, start, end); 2918 size_ += allocationSize * fetch_count; 2919} 2920 2921// Remove some objects of class "cl" from thread heap and add to central cache 2922inline void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) { 2923 ASSERT(N > 0); 2924 FreeList* src = &list_[cl]; 2925 if (N > src->length()) N = src->length(); 2926 size_ -= N*ByteSizeForClass(cl); 2927 2928 // We return prepackaged chains of the correct size to the central cache. 2929 // TODO: Use the same format internally in the thread caches? 2930 int batch_size = num_objects_to_move[cl]; 2931 while (N > batch_size) { 2932 void *tail, *head; 2933 src->PopRange(batch_size, &head, &tail); 2934 central_cache[cl].InsertRange(head, tail, batch_size); 2935 N -= batch_size; 2936 } 2937 void *tail, *head; 2938 src->PopRange(N, &head, &tail); 2939 central_cache[cl].InsertRange(head, tail, N); 2940} 2941 2942// Release idle memory to the central cache 2943inline void TCMalloc_ThreadCache::Scavenge() { 2944 // If the low-water mark for the free list is L, it means we would 2945 // not have had to allocate anything from the central cache even if 2946 // we had reduced the free list size by L. We aim to get closer to 2947 // that situation by dropping L/2 nodes from the free list. This 2948 // may not release much memory, but if so we will call scavenge again 2949 // pretty soon and the low-water marks will be high on that call. 2950 //int64 start = CycleClock::Now(); 2951 2952 for (size_t cl = 0; cl < kNumClasses; cl++) { 2953 FreeList* list = &list_[cl]; 2954 const int lowmark = list->lowwatermark(); 2955 if (lowmark > 0) { 2956 const int drop = (lowmark > 1) ? lowmark/2 : 1; 2957 ReleaseToCentralCache(cl, drop); 2958 } 2959 list->clear_lowwatermark(); 2960 } 2961 2962 //int64 finish = CycleClock::Now(); 2963 //CycleTimer ct; 2964 //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0); 2965} 2966 2967void TCMalloc_ThreadCache::PickNextSample(size_t k) { 2968 // Make next "random" number 2969 // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers 2970 static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0); 2971 uint32_t r = rnd_; 2972 rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly); 2973 2974 // Next point is "rnd_ % (sample_period)". I.e., average 2975 // increment is "sample_period/2". 2976 const int flag_value = static_cast<int>(FLAGS_tcmalloc_sample_parameter); 2977 static int last_flag_value = -1; 2978 2979 if (flag_value != last_flag_value) { 2980 SpinLockHolder h(&sample_period_lock); 2981 int i; 2982 for (i = 0; i < (static_cast<int>(sizeof(primes_list)/sizeof(primes_list[0])) - 1); i++) { 2983 if (primes_list[i] >= flag_value) { 2984 break; 2985 } 2986 } 2987 sample_period = primes_list[i]; 2988 last_flag_value = flag_value; 2989 } 2990 2991 bytes_until_sample_ += rnd_ % sample_period; 2992 2993 if (k > (static_cast<size_t>(-1) >> 2)) { 2994 // If the user has asked for a huge allocation then it is possible 2995 // for the code below to loop infinitely. Just return (note that 2996 // this throws off the sampling accuracy somewhat, but a user who 2997 // is allocating more than 1G of memory at a time can live with a 2998 // minor inaccuracy in profiling of small allocations, and also 2999 // would rather not wait for the loop below to terminate). 3000 return; 3001 } 3002 3003 while (bytes_until_sample_ < k) { 3004 // Increase bytes_until_sample_ by enough average sampling periods 3005 // (sample_period >> 1) to allow us to sample past the current 3006 // allocation. 3007 bytes_until_sample_ += (sample_period >> 1); 3008 } 3009 3010 bytes_until_sample_ -= k; 3011} 3012 3013void TCMalloc_ThreadCache::InitModule() { 3014 // There is a slight potential race here because of double-checked 3015 // locking idiom. However, as long as the program does a small 3016 // allocation before switching to multi-threaded mode, we will be 3017 // fine. We increase the chances of doing such a small allocation 3018 // by doing one in the constructor of the module_enter_exit_hook 3019 // object declared below. 3020 SpinLockHolder h(&pageheap_lock); 3021 if (!phinited) { 3022#ifdef WTF_CHANGES 3023 InitTSD(); 3024#endif 3025 InitSizeClasses(); 3026 threadheap_allocator.Init(); 3027 span_allocator.Init(); 3028 span_allocator.New(); // Reduce cache conflicts 3029 span_allocator.New(); // Reduce cache conflicts 3030 stacktrace_allocator.Init(); 3031 DLL_Init(&sampled_objects); 3032 for (size_t i = 0; i < kNumClasses; ++i) { 3033 central_cache[i].Init(i); 3034 } 3035 pageheap->init(); 3036 phinited = 1; 3037#if defined(WTF_CHANGES) && OS(DARWIN) 3038 FastMallocZone::init(); 3039#endif 3040 } 3041} 3042 3043inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::NewHeap(ThreadIdentifier tid) { 3044 // Create the heap and add it to the linked list 3045 TCMalloc_ThreadCache *heap = threadheap_allocator.New(); 3046 heap->Init(tid); 3047 heap->next_ = thread_heaps; 3048 heap->prev_ = NULL; 3049 if (thread_heaps != NULL) thread_heaps->prev_ = heap; 3050 thread_heaps = heap; 3051 thread_heap_count++; 3052 RecomputeThreadCacheSize(); 3053 return heap; 3054} 3055 3056inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() { 3057#ifdef HAVE_TLS 3058 // __thread is faster, but only when the kernel supports it 3059 if (KernelSupportsTLS()) 3060 return threadlocal_heap; 3061#elif OS(WINDOWS) 3062 return static_cast<TCMalloc_ThreadCache*>(TlsGetValue(tlsIndex)); 3063#else 3064 return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key)); 3065#endif 3066} 3067 3068inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() { 3069 TCMalloc_ThreadCache* ptr = NULL; 3070 if (!tsd_inited) { 3071 InitModule(); 3072 } else { 3073 ptr = GetThreadHeap(); 3074 } 3075 if (ptr == NULL) ptr = CreateCacheIfNecessary(); 3076 return ptr; 3077} 3078 3079// In deletion paths, we do not try to create a thread-cache. This is 3080// because we may be in the thread destruction code and may have 3081// already cleaned up the cache for this thread. 3082inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() { 3083 if (!tsd_inited) return NULL; 3084 void* const p = GetThreadHeap(); 3085 return reinterpret_cast<TCMalloc_ThreadCache*>(p); 3086} 3087 3088void TCMalloc_ThreadCache::InitTSD() { 3089 ASSERT(!tsd_inited); 3090#if USE(PTHREAD_GETSPECIFIC_DIRECT) 3091 pthread_key_init_np(heap_key, DestroyThreadCache); 3092#else 3093 pthread_key_create(&heap_key, DestroyThreadCache); 3094#endif 3095#if OS(WINDOWS) 3096 tlsIndex = TlsAlloc(); 3097#endif 3098 tsd_inited = true; 3099 3100#if !OS(WINDOWS) 3101 // We may have used a fake pthread_t for the main thread. Fix it. 3102 pthread_t zero; 3103 memset(&zero, 0, sizeof(zero)); 3104#endif 3105#ifndef WTF_CHANGES 3106 SpinLockHolder h(&pageheap_lock); 3107#else 3108 ASSERT(pageheap_lock.IsHeld()); 3109#endif 3110 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { 3111#if OS(WINDOWS) 3112 if (h->tid_ == 0) { 3113 h->tid_ = GetCurrentThreadId(); 3114 } 3115#else 3116 if (pthread_equal(h->tid_, zero)) { 3117 h->tid_ = pthread_self(); 3118 } 3119#endif 3120 } 3121} 3122 3123TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() { 3124 // Initialize per-thread data if necessary 3125 TCMalloc_ThreadCache* heap = NULL; 3126 { 3127 SpinLockHolder h(&pageheap_lock); 3128 3129#if OS(WINDOWS) 3130 DWORD me; 3131 if (!tsd_inited) { 3132 me = 0; 3133 } else { 3134 me = GetCurrentThreadId(); 3135 } 3136#else 3137 // Early on in glibc's life, we cannot even call pthread_self() 3138 pthread_t me; 3139 if (!tsd_inited) { 3140 memset(&me, 0, sizeof(me)); 3141 } else { 3142 me = pthread_self(); 3143 } 3144#endif 3145 3146 // This may be a recursive malloc call from pthread_setspecific() 3147 // In that case, the heap for this thread has already been created 3148 // and added to the linked list. So we search for that first. 3149 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { 3150#if OS(WINDOWS) 3151 if (h->tid_ == me) { 3152#else 3153 if (pthread_equal(h->tid_, me)) { 3154#endif 3155 heap = h; 3156 break; 3157 } 3158 } 3159 3160 if (heap == NULL) heap = NewHeap(me); 3161 } 3162 3163 // We call pthread_setspecific() outside the lock because it may 3164 // call malloc() recursively. The recursive call will never get 3165 // here again because it will find the already allocated heap in the 3166 // linked list of heaps. 3167 if (!heap->in_setspecific_ && tsd_inited) { 3168 heap->in_setspecific_ = true; 3169 setThreadHeap(heap); 3170 } 3171 return heap; 3172} 3173 3174void TCMalloc_ThreadCache::BecomeIdle() { 3175 if (!tsd_inited) return; // No caches yet 3176 TCMalloc_ThreadCache* heap = GetThreadHeap(); 3177 if (heap == NULL) return; // No thread cache to remove 3178 if (heap->in_setspecific_) return; // Do not disturb the active caller 3179 3180 heap->in_setspecific_ = true; 3181 setThreadHeap(NULL); 3182#ifdef HAVE_TLS 3183 // Also update the copy in __thread 3184 threadlocal_heap = NULL; 3185#endif 3186 heap->in_setspecific_ = false; 3187 if (GetThreadHeap() == heap) { 3188 // Somehow heap got reinstated by a recursive call to malloc 3189 // from pthread_setspecific. We give up in this case. 3190 return; 3191 } 3192 3193 // We can now get rid of the heap 3194 DeleteCache(heap); 3195} 3196 3197void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr) { 3198 // Note that "ptr" cannot be NULL since pthread promises not 3199 // to invoke the destructor on NULL values, but for safety, 3200 // we check anyway. 3201 if (ptr == NULL) return; 3202#ifdef HAVE_TLS 3203 // Prevent fast path of GetThreadHeap() from returning heap. 3204 threadlocal_heap = NULL; 3205#endif 3206 DeleteCache(reinterpret_cast<TCMalloc_ThreadCache*>(ptr)); 3207} 3208 3209void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache* heap) { 3210 // Remove all memory from heap 3211 heap->Cleanup(); 3212 3213 // Remove from linked list 3214 SpinLockHolder h(&pageheap_lock); 3215 if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_; 3216 if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_; 3217 if (thread_heaps == heap) thread_heaps = heap->next_; 3218 thread_heap_count--; 3219 RecomputeThreadCacheSize(); 3220 3221 threadheap_allocator.Delete(heap); 3222} 3223 3224void TCMalloc_ThreadCache::RecomputeThreadCacheSize() { 3225 // Divide available space across threads 3226 int n = thread_heap_count > 0 ? thread_heap_count : 1; 3227 size_t space = overall_thread_cache_size / n; 3228 3229 // Limit to allowed range 3230 if (space < kMinThreadCacheSize) space = kMinThreadCacheSize; 3231 if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize; 3232 3233 per_thread_cache_size = space; 3234} 3235 3236void TCMalloc_ThreadCache::Print() const { 3237 for (size_t cl = 0; cl < kNumClasses; ++cl) { 3238 MESSAGE(" %5" PRIuS " : %4d len; %4d lo\n", 3239 ByteSizeForClass(cl), 3240 list_[cl].length(), 3241 list_[cl].lowwatermark()); 3242 } 3243} 3244 3245// Extract interesting stats 3246struct TCMallocStats { 3247 uint64_t system_bytes; // Bytes alloced from system 3248 uint64_t thread_bytes; // Bytes in thread caches 3249 uint64_t central_bytes; // Bytes in central cache 3250 uint64_t transfer_bytes; // Bytes in central transfer cache 3251 uint64_t pageheap_bytes; // Bytes in page heap 3252 uint64_t metadata_bytes; // Bytes alloced for metadata 3253}; 3254 3255#ifndef WTF_CHANGES 3256// Get stats into "r". Also get per-size-class counts if class_count != NULL 3257static void ExtractStats(TCMallocStats* r, uint64_t* class_count) { 3258 r->central_bytes = 0; 3259 r->transfer_bytes = 0; 3260 for (int cl = 0; cl < kNumClasses; ++cl) { 3261 const int length = central_cache[cl].length(); 3262 const int tc_length = central_cache[cl].tc_length(); 3263 r->central_bytes += static_cast<uint64_t>(ByteSizeForClass(cl)) * length; 3264 r->transfer_bytes += 3265 static_cast<uint64_t>(ByteSizeForClass(cl)) * tc_length; 3266 if (class_count) class_count[cl] = length + tc_length; 3267 } 3268 3269 // Add stats from per-thread heaps 3270 r->thread_bytes = 0; 3271 { // scope 3272 SpinLockHolder h(&pageheap_lock); 3273 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { 3274 r->thread_bytes += h->Size(); 3275 if (class_count) { 3276 for (size_t cl = 0; cl < kNumClasses; ++cl) { 3277 class_count[cl] += h->freelist_length(cl); 3278 } 3279 } 3280 } 3281 } 3282 3283 { //scope 3284 SpinLockHolder h(&pageheap_lock); 3285 r->system_bytes = pageheap->SystemBytes(); 3286 r->metadata_bytes = metadata_system_bytes; 3287 r->pageheap_bytes = pageheap->FreeBytes(); 3288 } 3289} 3290#endif 3291 3292#ifndef WTF_CHANGES 3293// WRITE stats to "out" 3294static void DumpStats(TCMalloc_Printer* out, int level) { 3295 TCMallocStats stats; 3296 uint64_t class_count[kNumClasses]; 3297 ExtractStats(&stats, (level >= 2 ? class_count : NULL)); 3298 3299 if (level >= 2) { 3300 out->printf("------------------------------------------------\n"); 3301 uint64_t cumulative = 0; 3302 for (int cl = 0; cl < kNumClasses; ++cl) { 3303 if (class_count[cl] > 0) { 3304 uint64_t class_bytes = class_count[cl] * ByteSizeForClass(cl); 3305 cumulative += class_bytes; 3306 out->printf("class %3d [ %8" PRIuS " bytes ] : " 3307 "%8" PRIu64 " objs; %5.1f MB; %5.1f cum MB\n", 3308 cl, ByteSizeForClass(cl), 3309 class_count[cl], 3310 class_bytes / 1048576.0, 3311 cumulative / 1048576.0); 3312 } 3313 } 3314 3315 SpinLockHolder h(&pageheap_lock); 3316 pageheap->Dump(out); 3317 } 3318 3319 const uint64_t bytes_in_use = stats.system_bytes 3320 - stats.pageheap_bytes 3321 - stats.central_bytes 3322 - stats.transfer_bytes 3323 - stats.thread_bytes; 3324 3325 out->printf("------------------------------------------------\n" 3326 "MALLOC: %12" PRIu64 " Heap size\n" 3327 "MALLOC: %12" PRIu64 " Bytes in use by application\n" 3328 "MALLOC: %12" PRIu64 " Bytes free in page heap\n" 3329 "MALLOC: %12" PRIu64 " Bytes free in central cache\n" 3330 "MALLOC: %12" PRIu64 " Bytes free in transfer cache\n" 3331 "MALLOC: %12" PRIu64 " Bytes free in thread caches\n" 3332 "MALLOC: %12" PRIu64 " Spans in use\n" 3333 "MALLOC: %12" PRIu64 " Thread heaps in use\n" 3334 "MALLOC: %12" PRIu64 " Metadata allocated\n" 3335 "------------------------------------------------\n", 3336 stats.system_bytes, 3337 bytes_in_use, 3338 stats.pageheap_bytes, 3339 stats.central_bytes, 3340 stats.transfer_bytes, 3341 stats.thread_bytes, 3342 uint64_t(span_allocator.inuse()), 3343 uint64_t(threadheap_allocator.inuse()), 3344 stats.metadata_bytes); 3345} 3346 3347static void PrintStats(int level) { 3348 const int kBufferSize = 16 << 10; 3349 char* buffer = new char[kBufferSize]; 3350 TCMalloc_Printer printer(buffer, kBufferSize); 3351 DumpStats(&printer, level); 3352 write(STDERR_FILENO, buffer, strlen(buffer)); 3353 delete[] buffer; 3354} 3355 3356static void** DumpStackTraces() { 3357 // Count how much space we need 3358 int needed_slots = 0; 3359 { 3360 SpinLockHolder h(&pageheap_lock); 3361 for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) { 3362 StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects); 3363 needed_slots += 3 + stack->depth; 3364 } 3365 needed_slots += 100; // Slop in case sample grows 3366 needed_slots += needed_slots/8; // An extra 12.5% slop 3367 } 3368 3369 void** result = new void*[needed_slots]; 3370 if (result == NULL) { 3371 MESSAGE("tcmalloc: could not allocate %d slots for stack traces\n", 3372 needed_slots); 3373 return NULL; 3374 } 3375 3376 SpinLockHolder h(&pageheap_lock); 3377 int used_slots = 0; 3378 for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) { 3379 ASSERT(used_slots < needed_slots); // Need to leave room for terminator 3380 StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects); 3381 if (used_slots + 3 + stack->depth >= needed_slots) { 3382 // No more room 3383 break; 3384 } 3385 3386 result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1)); 3387 result[used_slots+1] = reinterpret_cast<void*>(stack->size); 3388 result[used_slots+2] = reinterpret_cast<void*>(stack->depth); 3389 for (int d = 0; d < stack->depth; d++) { 3390 result[used_slots+3+d] = stack->stack[d]; 3391 } 3392 used_slots += 3 + stack->depth; 3393 } 3394 result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0)); 3395 return result; 3396} 3397#endif 3398 3399#ifndef WTF_CHANGES 3400 3401// TCMalloc's support for extra malloc interfaces 3402class TCMallocImplementation : public MallocExtension { 3403 public: 3404 virtual void GetStats(char* buffer, int buffer_length) { 3405 ASSERT(buffer_length > 0); 3406 TCMalloc_Printer printer(buffer, buffer_length); 3407 3408 // Print level one stats unless lots of space is available 3409 if (buffer_length < 10000) { 3410 DumpStats(&printer, 1); 3411 } else { 3412 DumpStats(&printer, 2); 3413 } 3414 } 3415 3416 virtual void** ReadStackTraces() { 3417 return DumpStackTraces(); 3418 } 3419 3420 virtual bool GetNumericProperty(const char* name, size_t* value) { 3421 ASSERT(name != NULL); 3422 3423 if (strcmp(name, "generic.current_allocated_bytes") == 0) { 3424 TCMallocStats stats; 3425 ExtractStats(&stats, NULL); 3426 *value = stats.system_bytes 3427 - stats.thread_bytes 3428 - stats.central_bytes 3429 - stats.pageheap_bytes; 3430 return true; 3431 } 3432 3433 if (strcmp(name, "generic.heap_size") == 0) { 3434 TCMallocStats stats; 3435 ExtractStats(&stats, NULL); 3436 *value = stats.system_bytes; 3437 return true; 3438 } 3439 3440 if (strcmp(name, "tcmalloc.slack_bytes") == 0) { 3441 // We assume that bytes in the page heap are not fragmented too 3442 // badly, and are therefore available for allocation. 3443 SpinLockHolder l(&pageheap_lock); 3444 *value = pageheap->FreeBytes(); 3445 return true; 3446 } 3447 3448 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) { 3449 SpinLockHolder l(&pageheap_lock); 3450 *value = overall_thread_cache_size; 3451 return true; 3452 } 3453 3454 if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) { 3455 TCMallocStats stats; 3456 ExtractStats(&stats, NULL); 3457 *value = stats.thread_bytes; 3458 return true; 3459 } 3460 3461 return false; 3462 } 3463 3464 virtual bool SetNumericProperty(const char* name, size_t value) { 3465 ASSERT(name != NULL); 3466 3467 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) { 3468 // Clip the value to a reasonable range 3469 if (value < kMinThreadCacheSize) value = kMinThreadCacheSize; 3470 if (value > (1<<30)) value = (1<<30); // Limit to 1GB 3471 3472 SpinLockHolder l(&pageheap_lock); 3473 overall_thread_cache_size = static_cast<size_t>(value); 3474 TCMalloc_ThreadCache::RecomputeThreadCacheSize(); 3475 return true; 3476 } 3477 3478 return false; 3479 } 3480 3481 virtual void MarkThreadIdle() { 3482 TCMalloc_ThreadCache::BecomeIdle(); 3483 } 3484 3485 virtual void ReleaseFreeMemory() { 3486 SpinLockHolder h(&pageheap_lock); 3487 pageheap->ReleaseFreePages(); 3488 } 3489}; 3490#endif 3491 3492// The constructor allocates an object to ensure that initialization 3493// runs before main(), and therefore we do not have a chance to become 3494// multi-threaded before initialization. We also create the TSD key 3495// here. Presumably by the time this constructor runs, glibc is in 3496// good enough shape to handle pthread_key_create(). 3497// 3498// The constructor also takes the opportunity to tell STL to use 3499// tcmalloc. We want to do this early, before construct time, so 3500// all user STL allocations go through tcmalloc (which works really 3501// well for STL). 3502// 3503// The destructor prints stats when the program exits. 3504class TCMallocGuard { 3505 public: 3506 3507 TCMallocGuard() { 3508#ifdef HAVE_TLS // this is true if the cc/ld/libc combo support TLS 3509 // Check whether the kernel also supports TLS (needs to happen at runtime) 3510 CheckIfKernelSupportsTLS(); 3511#endif 3512#ifndef WTF_CHANGES 3513#ifdef WIN32 // patch the windows VirtualAlloc, etc. 3514 PatchWindowsFunctions(); // defined in windows/patch_functions.cc 3515#endif 3516#endif 3517 free(malloc(1)); 3518 TCMalloc_ThreadCache::InitTSD(); 3519 free(malloc(1)); 3520#ifndef WTF_CHANGES 3521 MallocExtension::Register(new TCMallocImplementation); 3522#endif 3523 } 3524 3525#ifndef WTF_CHANGES 3526 ~TCMallocGuard() { 3527 const char* env = getenv("MALLOCSTATS"); 3528 if (env != NULL) { 3529 int level = atoi(env); 3530 if (level < 1) level = 1; 3531 PrintStats(level); 3532 } 3533#ifdef WIN32 3534 UnpatchWindowsFunctions(); 3535#endif 3536 } 3537#endif 3538}; 3539 3540#ifndef WTF_CHANGES 3541static TCMallocGuard module_enter_exit_hook; 3542#endif 3543 3544 3545//------------------------------------------------------------------- 3546// Helpers for the exported routines below 3547//------------------------------------------------------------------- 3548 3549#ifndef WTF_CHANGES 3550 3551static Span* DoSampledAllocation(size_t size) { 3552 3553 // Grab the stack trace outside the heap lock 3554 StackTrace tmp; 3555 tmp.depth = GetStackTrace(tmp.stack, kMaxStackDepth, 1); 3556 tmp.size = size; 3557 3558 SpinLockHolder h(&pageheap_lock); 3559 // Allocate span 3560 Span *span = pageheap->New(pages(size == 0 ? 1 : size)); 3561 if (span == NULL) { 3562 return NULL; 3563 } 3564 3565 // Allocate stack trace 3566 StackTrace *stack = stacktrace_allocator.New(); 3567 if (stack == NULL) { 3568 // Sampling failed because of lack of memory 3569 return span; 3570 } 3571 3572 *stack = tmp; 3573 span->sample = 1; 3574 span->objects = stack; 3575 DLL_Prepend(&sampled_objects, span); 3576 3577 return span; 3578} 3579#endif 3580 3581static inline bool CheckCachedSizeClass(void *ptr) { 3582 PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 3583 size_t cached_value = pageheap->GetSizeClassIfCached(p); 3584 return cached_value == 0 || 3585 cached_value == pageheap->GetDescriptor(p)->sizeclass; 3586} 3587 3588static inline void* CheckedMallocResult(void *result) 3589{ 3590 ASSERT(result == 0 || CheckCachedSizeClass(result)); 3591 return result; 3592} 3593 3594static inline void* SpanToMallocResult(Span *span) { 3595 ASSERT_SPAN_COMMITTED(span); 3596 pageheap->CacheSizeClass(span->start, 0); 3597 return 3598 CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift)); 3599} 3600 3601#ifdef WTF_CHANGES 3602template <bool crashOnFailure> 3603#endif 3604static ALWAYS_INLINE void* do_malloc(size_t size) { 3605 void* ret = NULL; 3606 3607#ifdef WTF_CHANGES 3608 ASSERT(!isForbidden()); 3609#endif 3610 3611 // The following call forces module initialization 3612 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache(); 3613#ifndef WTF_CHANGES 3614 if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { 3615 Span* span = DoSampledAllocation(size); 3616 if (span != NULL) { 3617 ret = SpanToMallocResult(span); 3618 } 3619 } else 3620#endif 3621 if (size > kMaxSize) { 3622 // Use page-level allocator 3623 SpinLockHolder h(&pageheap_lock); 3624 Span* span = pageheap->New(pages(size)); 3625 if (span != NULL) { 3626 ret = SpanToMallocResult(span); 3627 } 3628 } else { 3629 // The common case, and also the simplest. This just pops the 3630 // size-appropriate freelist, afer replenishing it if it's empty. 3631 ret = CheckedMallocResult(heap->Allocate(size)); 3632 } 3633 if (!ret) { 3634#ifdef WTF_CHANGES 3635 if (crashOnFailure) // This branch should be optimized out by the compiler. 3636 CRASH(); 3637#else 3638 errno = ENOMEM; 3639#endif 3640 } 3641 return ret; 3642} 3643 3644static ALWAYS_INLINE void do_free(void* ptr) { 3645 if (ptr == NULL) return; 3646 ASSERT(pageheap != NULL); // Should not call free() before malloc() 3647 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 3648 Span* span = NULL; 3649 size_t cl = pageheap->GetSizeClassIfCached(p); 3650 3651 if (cl == 0) { 3652 span = pageheap->GetDescriptor(p); 3653 cl = span->sizeclass; 3654 pageheap->CacheSizeClass(p, cl); 3655 } 3656 if (cl != 0) { 3657#ifndef NO_TCMALLOC_SAMPLES 3658 ASSERT(!pageheap->GetDescriptor(p)->sample); 3659#endif 3660 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent(); 3661 if (heap != NULL) { 3662 heap->Deallocate(ptr, cl); 3663 } else { 3664 // Delete directly into central cache 3665 SLL_SetNext(ptr, NULL); 3666 central_cache[cl].InsertRange(ptr, ptr, 1); 3667 } 3668 } else { 3669 SpinLockHolder h(&pageheap_lock); 3670 ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); 3671 ASSERT(span != NULL && span->start == p); 3672#ifndef NO_TCMALLOC_SAMPLES 3673 if (span->sample) { 3674 DLL_Remove(span); 3675 stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects)); 3676 span->objects = NULL; 3677 } 3678#endif 3679 pageheap->Delete(span); 3680 } 3681} 3682 3683#ifndef WTF_CHANGES 3684// For use by exported routines below that want specific alignments 3685// 3686// Note: this code can be slow, and can significantly fragment memory. 3687// The expectation is that memalign/posix_memalign/valloc/pvalloc will 3688// not be invoked very often. This requirement simplifies our 3689// implementation and allows us to tune for expected allocation 3690// patterns. 3691static void* do_memalign(size_t align, size_t size) { 3692 ASSERT((align & (align - 1)) == 0); 3693 ASSERT(align > 0); 3694 if (pageheap == NULL) TCMalloc_ThreadCache::InitModule(); 3695 3696 // Allocate at least one byte to avoid boundary conditions below 3697 if (size == 0) size = 1; 3698 3699 if (size <= kMaxSize && align < kPageSize) { 3700 // Search through acceptable size classes looking for one with 3701 // enough alignment. This depends on the fact that 3702 // InitSizeClasses() currently produces several size classes that 3703 // are aligned at powers of two. We will waste time and space if 3704 // we miss in the size class array, but that is deemed acceptable 3705 // since memalign() should be used rarely. 3706 size_t cl = SizeClass(size); 3707 while (cl < kNumClasses && ((class_to_size[cl] & (align - 1)) != 0)) { 3708 cl++; 3709 } 3710 if (cl < kNumClasses) { 3711 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache(); 3712 return CheckedMallocResult(heap->Allocate(class_to_size[cl])); 3713 } 3714 } 3715 3716 // We will allocate directly from the page heap 3717 SpinLockHolder h(&pageheap_lock); 3718 3719 if (align <= kPageSize) { 3720 // Any page-level allocation will be fine 3721 // TODO: We could put the rest of this page in the appropriate 3722 // TODO: cache but it does not seem worth it. 3723 Span* span = pageheap->New(pages(size)); 3724 return span == NULL ? NULL : SpanToMallocResult(span); 3725 } 3726 3727 // Allocate extra pages and carve off an aligned portion 3728 const Length alloc = pages(size + align); 3729 Span* span = pageheap->New(alloc); 3730 if (span == NULL) return NULL; 3731 3732 // Skip starting portion so that we end up aligned 3733 Length skip = 0; 3734 while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) { 3735 skip++; 3736 } 3737 ASSERT(skip < alloc); 3738 if (skip > 0) { 3739 Span* rest = pageheap->Split(span, skip); 3740 pageheap->Delete(span); 3741 span = rest; 3742 } 3743 3744 // Skip trailing portion that we do not need to return 3745 const Length needed = pages(size); 3746 ASSERT(span->length >= needed); 3747 if (span->length > needed) { 3748 Span* trailer = pageheap->Split(span, needed); 3749 pageheap->Delete(trailer); 3750 } 3751 return SpanToMallocResult(span); 3752} 3753#endif 3754 3755// Helpers for use by exported routines below: 3756 3757#ifndef WTF_CHANGES 3758static inline void do_malloc_stats() { 3759 PrintStats(1); 3760} 3761#endif 3762 3763static inline int do_mallopt(int, int) { 3764 return 1; // Indicates error 3765} 3766 3767#ifdef HAVE_STRUCT_MALLINFO // mallinfo isn't defined on freebsd, for instance 3768static inline struct mallinfo do_mallinfo() { 3769 TCMallocStats stats; 3770 ExtractStats(&stats, NULL); 3771 3772 // Just some of the fields are filled in. 3773 struct mallinfo info; 3774 memset(&info, 0, sizeof(info)); 3775 3776 // Unfortunately, the struct contains "int" field, so some of the 3777 // size values will be truncated. 3778 info.arena = static_cast<int>(stats.system_bytes); 3779 info.fsmblks = static_cast<int>(stats.thread_bytes 3780 + stats.central_bytes 3781 + stats.transfer_bytes); 3782 info.fordblks = static_cast<int>(stats.pageheap_bytes); 3783 info.uordblks = static_cast<int>(stats.system_bytes 3784 - stats.thread_bytes 3785 - stats.central_bytes 3786 - stats.transfer_bytes 3787 - stats.pageheap_bytes); 3788 3789 return info; 3790} 3791#endif 3792 3793//------------------------------------------------------------------- 3794// Exported routines 3795//------------------------------------------------------------------- 3796 3797// CAVEAT: The code structure below ensures that MallocHook methods are always 3798// called from the stack frame of the invoked allocation function. 3799// heap-checker.cc depends on this to start a stack trace from 3800// the call to the (de)allocation function. 3801 3802#ifndef WTF_CHANGES 3803extern "C" 3804#else 3805#define do_malloc do_malloc<crashOnFailure> 3806 3807template <bool crashOnFailure> 3808ALWAYS_INLINE void* malloc(size_t); 3809 3810void* fastMalloc(size_t size) 3811{ 3812 return malloc<true>(size); 3813} 3814 3815TryMallocReturnValue tryFastMalloc(size_t size) 3816{ 3817 return malloc<false>(size); 3818} 3819 3820template <bool crashOnFailure> 3821ALWAYS_INLINE 3822#endif 3823void* malloc(size_t size) { 3824#if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 3825 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= size) // If overflow would occur... 3826 return 0; 3827 size += sizeof(AllocAlignmentInteger); 3828 void* result = do_malloc(size); 3829 if (!result) 3830 return 0; 3831 3832 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; 3833 result = static_cast<AllocAlignmentInteger*>(result) + 1; 3834#else 3835 void* result = do_malloc(size); 3836#endif 3837 3838#ifndef WTF_CHANGES 3839 MallocHook::InvokeNewHook(result, size); 3840#endif 3841 return result; 3842} 3843 3844#ifndef WTF_CHANGES 3845extern "C" 3846#endif 3847void free(void* ptr) { 3848#ifndef WTF_CHANGES 3849 MallocHook::InvokeDeleteHook(ptr); 3850#endif 3851 3852#if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 3853 if (!ptr) 3854 return; 3855 3856 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(ptr); 3857 if (*header != Internal::AllocTypeMalloc) 3858 Internal::fastMallocMatchFailed(ptr); 3859 do_free(header); 3860#else 3861 do_free(ptr); 3862#endif 3863} 3864 3865#ifndef WTF_CHANGES 3866extern "C" 3867#else 3868template <bool crashOnFailure> 3869ALWAYS_INLINE void* calloc(size_t, size_t); 3870 3871void* fastCalloc(size_t n, size_t elem_size) 3872{ 3873 return calloc<true>(n, elem_size); 3874} 3875 3876TryMallocReturnValue tryFastCalloc(size_t n, size_t elem_size) 3877{ 3878 return calloc<false>(n, elem_size); 3879} 3880 3881template <bool crashOnFailure> 3882ALWAYS_INLINE 3883#endif 3884void* calloc(size_t n, size_t elem_size) { 3885 size_t totalBytes = n * elem_size; 3886 3887 // Protect against overflow 3888 if (n > 1 && elem_size && (totalBytes / elem_size) != n) 3889 return 0; 3890 3891#if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 3892 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes) // If overflow would occur... 3893 return 0; 3894 3895 totalBytes += sizeof(AllocAlignmentInteger); 3896 void* result = do_malloc(totalBytes); 3897 if (!result) 3898 return 0; 3899 3900 memset(result, 0, totalBytes); 3901 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; 3902 result = static_cast<AllocAlignmentInteger*>(result) + 1; 3903#else 3904 void* result = do_malloc(totalBytes); 3905 if (result != NULL) { 3906 memset(result, 0, totalBytes); 3907 } 3908#endif 3909 3910#ifndef WTF_CHANGES 3911 MallocHook::InvokeNewHook(result, totalBytes); 3912#endif 3913 return result; 3914} 3915 3916// Since cfree isn't used anywhere, we don't compile it in. 3917#ifndef WTF_CHANGES 3918#ifndef WTF_CHANGES 3919extern "C" 3920#endif 3921void cfree(void* ptr) { 3922#ifndef WTF_CHANGES 3923 MallocHook::InvokeDeleteHook(ptr); 3924#endif 3925 do_free(ptr); 3926} 3927#endif 3928 3929#ifndef WTF_CHANGES 3930extern "C" 3931#else 3932template <bool crashOnFailure> 3933ALWAYS_INLINE void* realloc(void*, size_t); 3934 3935void* fastRealloc(void* old_ptr, size_t new_size) 3936{ 3937 return realloc<true>(old_ptr, new_size); 3938} 3939 3940TryMallocReturnValue tryFastRealloc(void* old_ptr, size_t new_size) 3941{ 3942 return realloc<false>(old_ptr, new_size); 3943} 3944 3945template <bool crashOnFailure> 3946ALWAYS_INLINE 3947#endif 3948void* realloc(void* old_ptr, size_t new_size) { 3949 if (old_ptr == NULL) { 3950#if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 3951 void* result = malloc(new_size); 3952#else 3953 void* result = do_malloc(new_size); 3954#ifndef WTF_CHANGES 3955 MallocHook::InvokeNewHook(result, new_size); 3956#endif 3957#endif 3958 return result; 3959 } 3960 if (new_size == 0) { 3961#ifndef WTF_CHANGES 3962 MallocHook::InvokeDeleteHook(old_ptr); 3963#endif 3964 free(old_ptr); 3965 return NULL; 3966 } 3967 3968#if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 3969 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= new_size) // If overflow would occur... 3970 return 0; 3971 new_size += sizeof(AllocAlignmentInteger); 3972 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(old_ptr); 3973 if (*header != Internal::AllocTypeMalloc) 3974 Internal::fastMallocMatchFailed(old_ptr); 3975 old_ptr = header; 3976#endif 3977 3978 // Get the size of the old entry 3979 const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift; 3980 size_t cl = pageheap->GetSizeClassIfCached(p); 3981 Span *span = NULL; 3982 size_t old_size; 3983 if (cl == 0) { 3984 span = pageheap->GetDescriptor(p); 3985 cl = span->sizeclass; 3986 pageheap->CacheSizeClass(p, cl); 3987 } 3988 if (cl != 0) { 3989 old_size = ByteSizeForClass(cl); 3990 } else { 3991 ASSERT(span != NULL); 3992 old_size = span->length << kPageShift; 3993 } 3994 3995 // Reallocate if the new size is larger than the old size, 3996 // or if the new size is significantly smaller than the old size. 3997 if ((new_size > old_size) || (AllocationSize(new_size) < old_size)) { 3998 // Need to reallocate 3999 void* new_ptr = do_malloc(new_size); 4000 if (new_ptr == NULL) { 4001 return NULL; 4002 } 4003#ifndef WTF_CHANGES 4004 MallocHook::InvokeNewHook(new_ptr, new_size); 4005#endif 4006 memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size)); 4007#ifndef WTF_CHANGES 4008 MallocHook::InvokeDeleteHook(old_ptr); 4009#endif 4010 // We could use a variant of do_free() that leverages the fact 4011 // that we already know the sizeclass of old_ptr. The benefit 4012 // would be small, so don't bother. 4013 do_free(old_ptr); 4014#if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 4015 new_ptr = static_cast<AllocAlignmentInteger*>(new_ptr) + 1; 4016#endif 4017 return new_ptr; 4018 } else { 4019#if ENABLE(FAST_MALLOC_MATCH_VALIDATION) 4020 old_ptr = static_cast<AllocAlignmentInteger*>(old_ptr) + 1; // Set old_ptr back to the user pointer. 4021#endif 4022 return old_ptr; 4023 } 4024} 4025 4026#ifdef WTF_CHANGES 4027#undef do_malloc 4028#else 4029 4030static SpinLock set_new_handler_lock = SPINLOCK_INITIALIZER; 4031 4032static inline void* cpp_alloc(size_t size, bool nothrow) { 4033 for (;;) { 4034 void* p = do_malloc(size); 4035#ifdef PREANSINEW 4036 return p; 4037#else 4038 if (p == NULL) { // allocation failed 4039 // Get the current new handler. NB: this function is not 4040 // thread-safe. We make a feeble stab at making it so here, but 4041 // this lock only protects against tcmalloc interfering with 4042 // itself, not with other libraries calling set_new_handler. 4043 std::new_handler nh; 4044 { 4045 SpinLockHolder h(&set_new_handler_lock); 4046 nh = std::set_new_handler(0); 4047 (void) std::set_new_handler(nh); 4048 } 4049 // If no new_handler is established, the allocation failed. 4050 if (!nh) { 4051 if (nothrow) return 0; 4052 throw std::bad_alloc(); 4053 } 4054 // Otherwise, try the new_handler. If it returns, retry the 4055 // allocation. If it throws std::bad_alloc, fail the allocation. 4056 // if it throws something else, don't interfere. 4057 try { 4058 (*nh)(); 4059 } catch (const std::bad_alloc&) { 4060 if (!nothrow) throw; 4061 return p; 4062 } 4063 } else { // allocation success 4064 return p; 4065 } 4066#endif 4067 } 4068} 4069 4070#if ENABLE(GLOBAL_FASTMALLOC_NEW) 4071 4072void* operator new(size_t size) { 4073 void* p = cpp_alloc(size, false); 4074 // We keep this next instruction out of cpp_alloc for a reason: when 4075 // it's in, and new just calls cpp_alloc, the optimizer may fold the 4076 // new call into cpp_alloc, which messes up our whole section-based 4077 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc 4078 // isn't the last thing this fn calls, and prevents the folding. 4079 MallocHook::InvokeNewHook(p, size); 4080 return p; 4081} 4082 4083void* operator new(size_t size, const std::nothrow_t&) __THROW { 4084 void* p = cpp_alloc(size, true); 4085 MallocHook::InvokeNewHook(p, size); 4086 return p; 4087} 4088 4089void operator delete(void* p) __THROW { 4090 MallocHook::InvokeDeleteHook(p); 4091 do_free(p); 4092} 4093 4094void operator delete(void* p, const std::nothrow_t&) __THROW { 4095 MallocHook::InvokeDeleteHook(p); 4096 do_free(p); 4097} 4098 4099void* operator new[](size_t size) { 4100 void* p = cpp_alloc(size, false); 4101 // We keep this next instruction out of cpp_alloc for a reason: when 4102 // it's in, and new just calls cpp_alloc, the optimizer may fold the 4103 // new call into cpp_alloc, which messes up our whole section-based 4104 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc 4105 // isn't the last thing this fn calls, and prevents the folding. 4106 MallocHook::InvokeNewHook(p, size); 4107 return p; 4108} 4109 4110void* operator new[](size_t size, const std::nothrow_t&) __THROW { 4111 void* p = cpp_alloc(size, true); 4112 MallocHook::InvokeNewHook(p, size); 4113 return p; 4114} 4115 4116void operator delete[](void* p) __THROW { 4117 MallocHook::InvokeDeleteHook(p); 4118 do_free(p); 4119} 4120 4121void operator delete[](void* p, const std::nothrow_t&) __THROW { 4122 MallocHook::InvokeDeleteHook(p); 4123 do_free(p); 4124} 4125 4126#endif 4127 4128extern "C" void* memalign(size_t align, size_t size) __THROW { 4129 void* result = do_memalign(align, size); 4130 MallocHook::InvokeNewHook(result, size); 4131 return result; 4132} 4133 4134extern "C" int posix_memalign(void** result_ptr, size_t align, size_t size) 4135 __THROW { 4136 if (((align % sizeof(void*)) != 0) || 4137 ((align & (align - 1)) != 0) || 4138 (align == 0)) { 4139 return EINVAL; 4140 } 4141 4142 void* result = do_memalign(align, size); 4143 MallocHook::InvokeNewHook(result, size); 4144 if (result == NULL) { 4145 return ENOMEM; 4146 } else { 4147 *result_ptr = result; 4148 return 0; 4149 } 4150} 4151 4152static size_t pagesize = 0; 4153 4154extern "C" void* valloc(size_t size) __THROW { 4155 // Allocate page-aligned object of length >= size bytes 4156 if (pagesize == 0) pagesize = getpagesize(); 4157 void* result = do_memalign(pagesize, size); 4158 MallocHook::InvokeNewHook(result, size); 4159 return result; 4160} 4161 4162extern "C" void* pvalloc(size_t size) __THROW { 4163 // Round up size to a multiple of pagesize 4164 if (pagesize == 0) pagesize = getpagesize(); 4165 size = (size + pagesize - 1) & ~(pagesize - 1); 4166 void* result = do_memalign(pagesize, size); 4167 MallocHook::InvokeNewHook(result, size); 4168 return result; 4169} 4170 4171extern "C" void malloc_stats(void) { 4172 do_malloc_stats(); 4173} 4174 4175extern "C" int mallopt(int cmd, int value) { 4176 return do_mallopt(cmd, value); 4177} 4178 4179#ifdef HAVE_STRUCT_MALLINFO 4180extern "C" struct mallinfo mallinfo(void) { 4181 return do_mallinfo(); 4182} 4183#endif 4184 4185//------------------------------------------------------------------- 4186// Some library routines on RedHat 9 allocate memory using malloc() 4187// and free it using __libc_free() (or vice-versa). Since we provide 4188// our own implementations of malloc/free, we need to make sure that 4189// the __libc_XXX variants (defined as part of glibc) also point to 4190// the same implementations. 4191//------------------------------------------------------------------- 4192 4193#if defined(__GLIBC__) 4194extern "C" { 4195#if COMPILER(GCC) && !defined(__MACH__) && defined(HAVE___ATTRIBUTE__) 4196 // Potentially faster variants that use the gcc alias extension. 4197 // Mach-O (Darwin) does not support weak aliases, hence the __MACH__ check. 4198# define ALIAS(x) __attribute__ ((weak, alias (x))) 4199 void* __libc_malloc(size_t size) ALIAS("malloc"); 4200 void __libc_free(void* ptr) ALIAS("free"); 4201 void* __libc_realloc(void* ptr, size_t size) ALIAS("realloc"); 4202 void* __libc_calloc(size_t n, size_t size) ALIAS("calloc"); 4203 void __libc_cfree(void* ptr) ALIAS("cfree"); 4204 void* __libc_memalign(size_t align, size_t s) ALIAS("memalign"); 4205 void* __libc_valloc(size_t size) ALIAS("valloc"); 4206 void* __libc_pvalloc(size_t size) ALIAS("pvalloc"); 4207 int __posix_memalign(void** r, size_t a, size_t s) ALIAS("posix_memalign"); 4208# undef ALIAS 4209# else /* not __GNUC__ */ 4210 // Portable wrappers 4211 void* __libc_malloc(size_t size) { return malloc(size); } 4212 void __libc_free(void* ptr) { free(ptr); } 4213 void* __libc_realloc(void* ptr, size_t size) { return realloc(ptr, size); } 4214 void* __libc_calloc(size_t n, size_t size) { return calloc(n, size); } 4215 void __libc_cfree(void* ptr) { cfree(ptr); } 4216 void* __libc_memalign(size_t align, size_t s) { return memalign(align, s); } 4217 void* __libc_valloc(size_t size) { return valloc(size); } 4218 void* __libc_pvalloc(size_t size) { return pvalloc(size); } 4219 int __posix_memalign(void** r, size_t a, size_t s) { 4220 return posix_memalign(r, a, s); 4221 } 4222# endif /* __GNUC__ */ 4223} 4224#endif /* __GLIBC__ */ 4225 4226// Override __libc_memalign in libc on linux boxes specially. 4227// They have a bug in libc that causes them to (very rarely) allocate 4228// with __libc_memalign() yet deallocate with free() and the 4229// definitions above don't catch it. 4230// This function is an exception to the rule of calling MallocHook method 4231// from the stack frame of the allocation function; 4232// heap-checker handles this special case explicitly. 4233static void *MemalignOverride(size_t align, size_t size, const void *caller) 4234 __THROW { 4235 void* result = do_memalign(align, size); 4236 MallocHook::InvokeNewHook(result, size); 4237 return result; 4238} 4239void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride; 4240 4241#endif 4242 4243#ifdef WTF_CHANGES 4244void releaseFastMallocFreeMemory() 4245{ 4246 // Flush free pages in the current thread cache back to the page heap. 4247 // Low watermark mechanism in Scavenge() prevents full return on the first pass. 4248 // The second pass flushes everything. 4249 if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) { 4250 threadCache->Scavenge(); 4251 threadCache->Scavenge(); 4252 } 4253 4254 SpinLockHolder h(&pageheap_lock); 4255 pageheap->ReleaseFreePages(); 4256} 4257 4258FastMallocStatistics fastMallocStatistics() 4259{ 4260 FastMallocStatistics statistics; 4261 4262 SpinLockHolder lockHolder(&pageheap_lock); 4263 statistics.reservedVMBytes = static_cast<size_t>(pageheap->SystemBytes()); 4264 statistics.committedVMBytes = statistics.reservedVMBytes - pageheap->ReturnedBytes(); 4265 4266 statistics.freeListBytes = 0; 4267 for (unsigned cl = 0; cl < kNumClasses; ++cl) { 4268 const int length = central_cache[cl].length(); 4269 const int tc_length = central_cache[cl].tc_length(); 4270 4271 statistics.freeListBytes += ByteSizeForClass(cl) * (length + tc_length); 4272 } 4273 for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_) 4274 statistics.freeListBytes += threadCache->Size(); 4275 4276 return statistics; 4277} 4278 4279size_t fastMallocSize(const void* ptr) 4280{ 4281 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; 4282 Span* span = pageheap->GetDescriptorEnsureSafe(p); 4283 4284 if (!span || span->free) 4285 return 0; 4286 4287 for (void* free = span->objects; free != NULL; free = *((void**) free)) { 4288 if (ptr == free) 4289 return 0; 4290 } 4291 4292 if (size_t cl = span->sizeclass) 4293 return ByteSizeForClass(cl); 4294 4295 return span->length << kPageShift; 4296} 4297 4298#if OS(DARWIN) 4299 4300class FreeObjectFinder { 4301 const RemoteMemoryReader& m_reader; 4302 HashSet<void*> m_freeObjects; 4303 4304public: 4305 FreeObjectFinder(const RemoteMemoryReader& reader) : m_reader(reader) { } 4306 4307 void visit(void* ptr) { m_freeObjects.add(ptr); } 4308 bool isFreeObject(void* ptr) const { return m_freeObjects.contains(ptr); } 4309 bool isFreeObject(vm_address_t ptr) const { return isFreeObject(reinterpret_cast<void*>(ptr)); } 4310 size_t freeObjectCount() const { return m_freeObjects.size(); } 4311 4312 void findFreeObjects(TCMalloc_ThreadCache* threadCache) 4313 { 4314 for (; threadCache; threadCache = (threadCache->next_ ? m_reader(threadCache->next_) : 0)) 4315 threadCache->enumerateFreeObjects(*this, m_reader); 4316 } 4317 4318 void findFreeObjects(TCMalloc_Central_FreeListPadded* centralFreeList, size_t numSizes, TCMalloc_Central_FreeListPadded* remoteCentralFreeList) 4319 { 4320 for (unsigned i = 0; i < numSizes; i++) 4321 centralFreeList[i].enumerateFreeObjects(*this, m_reader, remoteCentralFreeList + i); 4322 } 4323}; 4324 4325class PageMapFreeObjectFinder { 4326 const RemoteMemoryReader& m_reader; 4327 FreeObjectFinder& m_freeObjectFinder; 4328 4329public: 4330 PageMapFreeObjectFinder(const RemoteMemoryReader& reader, FreeObjectFinder& freeObjectFinder) 4331 : m_reader(reader) 4332 , m_freeObjectFinder(freeObjectFinder) 4333 { } 4334 4335 int visit(void* ptr) const 4336 { 4337 if (!ptr) 4338 return 1; 4339 4340 Span* span = m_reader(reinterpret_cast<Span*>(ptr)); 4341 if (!span) 4342 return 1; 4343 4344 if (span->free) { 4345 void* ptr = reinterpret_cast<void*>(span->start << kPageShift); 4346 m_freeObjectFinder.visit(ptr); 4347 } else if (span->sizeclass) { 4348 // Walk the free list of the small-object span, keeping track of each object seen 4349 for (void* nextObject = span->objects; nextObject; nextObject = m_reader.nextEntryInLinkedList(reinterpret_cast<void**>(nextObject))) 4350 m_freeObjectFinder.visit(nextObject); 4351 } 4352 return span->length; 4353 } 4354}; 4355 4356class PageMapMemoryUsageRecorder { 4357 task_t m_task; 4358 void* m_context; 4359 unsigned m_typeMask; 4360 vm_range_recorder_t* m_recorder; 4361 const RemoteMemoryReader& m_reader; 4362 const FreeObjectFinder& m_freeObjectFinder; 4363 4364 HashSet<void*> m_seenPointers; 4365 Vector<Span*> m_coalescedSpans; 4366 4367public: 4368 PageMapMemoryUsageRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader, const FreeObjectFinder& freeObjectFinder) 4369 : m_task(task) 4370 , m_context(context) 4371 , m_typeMask(typeMask) 4372 , m_recorder(recorder) 4373 , m_reader(reader) 4374 , m_freeObjectFinder(freeObjectFinder) 4375 { } 4376 4377 ~PageMapMemoryUsageRecorder() 4378 { 4379 ASSERT(!m_coalescedSpans.size()); 4380 } 4381 4382 void recordPendingRegions() 4383 { 4384 Span* lastSpan = m_coalescedSpans[m_coalescedSpans.size() - 1]; 4385 vm_range_t ptrRange = { m_coalescedSpans[0]->start << kPageShift, 0 }; 4386 ptrRange.size = (lastSpan->start << kPageShift) - ptrRange.address + (lastSpan->length * kPageSize); 4387 4388 // Mark the memory region the spans represent as a candidate for containing pointers 4389 if (m_typeMask & MALLOC_PTR_REGION_RANGE_TYPE) 4390 (*m_recorder)(m_task, m_context, MALLOC_PTR_REGION_RANGE_TYPE, &ptrRange, 1); 4391 4392 if (!(m_typeMask & MALLOC_PTR_IN_USE_RANGE_TYPE)) { 4393 m_coalescedSpans.clear(); 4394 return; 4395 } 4396 4397 Vector<vm_range_t, 1024> allocatedPointers; 4398 for (size_t i = 0; i < m_coalescedSpans.size(); ++i) { 4399 Span *theSpan = m_coalescedSpans[i]; 4400 if (theSpan->free) 4401 continue; 4402 4403 vm_address_t spanStartAddress = theSpan->start << kPageShift; 4404 vm_size_t spanSizeInBytes = theSpan->length * kPageSize; 4405 4406 if (!theSpan->sizeclass) { 4407 // If it's an allocated large object span, mark it as in use 4408 if (!m_freeObjectFinder.isFreeObject(spanStartAddress)) 4409 allocatedPointers.append((vm_range_t){spanStartAddress, spanSizeInBytes}); 4410 } else { 4411 const size_t objectSize = ByteSizeForClass(theSpan->sizeclass); 4412 4413 // Mark each allocated small object within the span as in use 4414 const vm_address_t endOfSpan = spanStartAddress + spanSizeInBytes; 4415 for (vm_address_t object = spanStartAddress; object + objectSize <= endOfSpan; object += objectSize) { 4416 if (!m_freeObjectFinder.isFreeObject(object)) 4417 allocatedPointers.append((vm_range_t){object, objectSize}); 4418 } 4419 } 4420 } 4421 4422 (*m_recorder)(m_task, m_context, MALLOC_PTR_IN_USE_RANGE_TYPE, allocatedPointers.data(), allocatedPointers.size()); 4423 4424 m_coalescedSpans.clear(); 4425 } 4426 4427 int visit(void* ptr) 4428 { 4429 if (!ptr) 4430 return 1; 4431 4432 Span* span = m_reader(reinterpret_cast<Span*>(ptr)); 4433 if (!span || !span->start) 4434 return 1; 4435 4436 if (m_seenPointers.contains(ptr)) 4437 return span->length; 4438 m_seenPointers.add(ptr); 4439 4440 if (!m_coalescedSpans.size()) { 4441 m_coalescedSpans.append(span); 4442 return span->length; 4443 } 4444 4445 Span* previousSpan = m_coalescedSpans[m_coalescedSpans.size() - 1]; 4446 vm_address_t previousSpanStartAddress = previousSpan->start << kPageShift; 4447 vm_size_t previousSpanSizeInBytes = previousSpan->length * kPageSize; 4448 4449 // If the new span is adjacent to the previous span, do nothing for now. 4450 vm_address_t spanStartAddress = span->start << kPageShift; 4451 if (spanStartAddress == previousSpanStartAddress + previousSpanSizeInBytes) { 4452 m_coalescedSpans.append(span); 4453 return span->length; 4454 } 4455 4456 // New span is not adjacent to previous span, so record the spans coalesced so far. 4457 recordPendingRegions(); 4458 m_coalescedSpans.append(span); 4459 4460 return span->length; 4461 } 4462}; 4463 4464class AdminRegionRecorder { 4465 task_t m_task; 4466 void* m_context; 4467 unsigned m_typeMask; 4468 vm_range_recorder_t* m_recorder; 4469 const RemoteMemoryReader& m_reader; 4470 4471 Vector<vm_range_t, 1024> m_pendingRegions; 4472 4473public: 4474 AdminRegionRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader) 4475 : m_task(task) 4476 , m_context(context) 4477 , m_typeMask(typeMask) 4478 , m_recorder(recorder) 4479 , m_reader(reader) 4480 { } 4481 4482 void recordRegion(vm_address_t ptr, size_t size) 4483 { 4484 if (m_typeMask & MALLOC_ADMIN_REGION_RANGE_TYPE) 4485 m_pendingRegions.append((vm_range_t){ ptr, size }); 4486 } 4487 4488 void visit(void *ptr, size_t size) 4489 { 4490 recordRegion(reinterpret_cast<vm_address_t>(ptr), size); 4491 } 4492 4493 void recordPendingRegions() 4494 { 4495 if (m_pendingRegions.size()) { 4496 (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, m_pendingRegions.data(), m_pendingRegions.size()); 4497 m_pendingRegions.clear(); 4498 } 4499 } 4500 4501 ~AdminRegionRecorder() 4502 { 4503 ASSERT(!m_pendingRegions.size()); 4504 } 4505}; 4506 4507kern_return_t FastMallocZone::enumerate(task_t task, void* context, unsigned typeMask, vm_address_t zoneAddress, memory_reader_t reader, vm_range_recorder_t recorder) 4508{ 4509 RemoteMemoryReader memoryReader(task, reader); 4510 4511 InitSizeClasses(); 4512 4513 FastMallocZone* mzone = memoryReader(reinterpret_cast<FastMallocZone*>(zoneAddress)); 4514 TCMalloc_PageHeap* pageHeap = memoryReader(mzone->m_pageHeap); 4515 TCMalloc_ThreadCache** threadHeapsPointer = memoryReader(mzone->m_threadHeaps); 4516 TCMalloc_ThreadCache* threadHeaps = memoryReader(*threadHeapsPointer); 4517 4518 TCMalloc_Central_FreeListPadded* centralCaches = memoryReader(mzone->m_centralCaches, sizeof(TCMalloc_Central_FreeListPadded) * kNumClasses); 4519 4520 FreeObjectFinder finder(memoryReader); 4521 finder.findFreeObjects(threadHeaps); 4522 finder.findFreeObjects(centralCaches, kNumClasses, mzone->m_centralCaches); 4523 4524 TCMalloc_PageHeap::PageMap* pageMap = &pageHeap->pagemap_; 4525 PageMapFreeObjectFinder pageMapFinder(memoryReader, finder); 4526 pageMap->visitValues(pageMapFinder, memoryReader); 4527 4528 PageMapMemoryUsageRecorder usageRecorder(task, context, typeMask, recorder, memoryReader, finder); 4529 pageMap->visitValues(usageRecorder, memoryReader); 4530 usageRecorder.recordPendingRegions(); 4531 4532 AdminRegionRecorder adminRegionRecorder(task, context, typeMask, recorder, memoryReader); 4533 pageMap->visitAllocations(adminRegionRecorder, memoryReader); 4534 4535 PageHeapAllocator<Span>* spanAllocator = memoryReader(mzone->m_spanAllocator); 4536 PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator = memoryReader(mzone->m_pageHeapAllocator); 4537 4538 spanAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader); 4539 pageHeapAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader); 4540 4541 adminRegionRecorder.recordPendingRegions(); 4542 4543 return 0; 4544} 4545 4546size_t FastMallocZone::size(malloc_zone_t*, const void*) 4547{ 4548 return 0; 4549} 4550 4551void* FastMallocZone::zoneMalloc(malloc_zone_t*, size_t) 4552{ 4553 return 0; 4554} 4555 4556void* FastMallocZone::zoneCalloc(malloc_zone_t*, size_t, size_t) 4557{ 4558 return 0; 4559} 4560 4561void FastMallocZone::zoneFree(malloc_zone_t*, void* ptr) 4562{ 4563 // Due to <rdar://problem/5671357> zoneFree may be called by the system free even if the pointer 4564 // is not in this zone. When this happens, the pointer being freed was not allocated by any 4565 // zone so we need to print a useful error for the application developer. 4566 malloc_printf("*** error for object %p: pointer being freed was not allocated\n", ptr); 4567} 4568 4569void* FastMallocZone::zoneRealloc(malloc_zone_t*, void*, size_t) 4570{ 4571 return 0; 4572} 4573 4574 4575#undef malloc 4576#undef free 4577#undef realloc 4578#undef calloc 4579 4580extern "C" { 4581malloc_introspection_t jscore_fastmalloc_introspection = { &FastMallocZone::enumerate, &FastMallocZone::goodSize, &FastMallocZone::check, &FastMallocZone::print, 4582 &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics 4583 4584#if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) 4585 , 0 // zone_locked will not be called on the zone unless it advertises itself as version five or higher. 4586#endif 4587#if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !defined(BUILDING_ON_SNOW_LEOPARD) 4588 , 0, 0, 0, 0 // These members will not be used unless the zone advertises itself as version seven or higher. 4589#endif 4590 4591 }; 4592} 4593 4594FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches, PageHeapAllocator<Span>* spanAllocator, PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator) 4595 : m_pageHeap(pageHeap) 4596 , m_threadHeaps(threadHeaps) 4597 , m_centralCaches(centralCaches) 4598 , m_spanAllocator(spanAllocator) 4599 , m_pageHeapAllocator(pageHeapAllocator) 4600{ 4601 memset(&m_zone, 0, sizeof(m_zone)); 4602 m_zone.version = 4; 4603 m_zone.zone_name = "JavaScriptCore FastMalloc"; 4604 m_zone.size = &FastMallocZone::size; 4605 m_zone.malloc = &FastMallocZone::zoneMalloc; 4606 m_zone.calloc = &FastMallocZone::zoneCalloc; 4607 m_zone.realloc = &FastMallocZone::zoneRealloc; 4608 m_zone.free = &FastMallocZone::zoneFree; 4609 m_zone.valloc = &FastMallocZone::zoneValloc; 4610 m_zone.destroy = &FastMallocZone::zoneDestroy; 4611 m_zone.introspect = &jscore_fastmalloc_introspection; 4612 malloc_zone_register(&m_zone); 4613} 4614 4615 4616void FastMallocZone::init() 4617{ 4618 static FastMallocZone zone(pageheap, &thread_heaps, static_cast<TCMalloc_Central_FreeListPadded*>(central_cache), &span_allocator, &threadheap_allocator); 4619} 4620 4621#endif // OS(DARWIN) 4622 4623} // namespace WTF 4624#endif // WTF_CHANGES 4625 4626#endif // FORCE_SYSTEM_MALLOC 4627