1// Copyright (c) 2005, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31// Author: Sanjay Ghemawat <opensource@google.com>
32//
33// A malloc that uses a per-thread cache to satisfy small malloc requests.
34// (The time for malloc/free of a small object drops from 300 ns to 50 ns.)
35//
36// See doc/tcmalloc.html for a high-level
37// description of how this malloc works.
38//
39// SYNCHRONIZATION
40//  1. The thread-specific lists are accessed without acquiring any locks.
41//     This is safe because each such list is only accessed by one thread.
42//  2. We have a lock per central free-list, and hold it while manipulating
43//     the central free list for a particular size.
44//  3. The central page allocator is protected by "pageheap_lock".
45//  4. The pagemap (which maps from page-number to descriptor),
46//     can be read without holding any locks, and written while holding
47//     the "pageheap_lock".
48//  5. To improve performance, a subset of the information one can get
49//     from the pagemap is cached in a data structure, pagemap_cache_,
50//     that atomically reads and writes its entries.  This cache can be
51//     read and written without locking.
52//
53//     This multi-threaded access to the pagemap is safe for fairly
54//     subtle reasons.  We basically assume that when an object X is
55//     allocated by thread A and deallocated by thread B, there must
56//     have been appropriate synchronization in the handoff of object
57//     X from thread A to thread B.  The same logic applies to pagemap_cache_.
58//
59// THE PAGEID-TO-SIZECLASS CACHE
60// Hot PageID-to-sizeclass mappings are held by pagemap_cache_.  If this cache
61// returns 0 for a particular PageID then that means "no information," not that
62// the sizeclass is 0.  The cache may have stale information for pages that do
63// not hold the beginning of any free()'able object.  Staleness is eliminated
64// in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and
65// do_memalign() for all other relevant pages.
66//
67// PAGEMAP
68// -------
69// Page map contains a mapping from page id to Span.
70//
71// If Span s occupies pages [p..q],
72//      pagemap[p] == s
73//      pagemap[q] == s
74//      pagemap[p+1..q-1] are undefined
75//      pagemap[p-1] and pagemap[q+1] are defined:
76//         NULL if the corresponding page is not yet in the address space.
77//         Otherwise it points to a Span.  This span may be free
78//         or allocated.  If free, it is in one of pageheap's freelist.
79//
80// TODO: Bias reclamation to larger addresses
81// TODO: implement mallinfo/mallopt
82// TODO: Better testing
83//
84// 9/28/2003 (new page-level allocator replaces ptmalloc2):
85// * malloc/free of small objects goes from ~300 ns to ~50 ns.
86// * allocation of a reasonably complicated struct
87//   goes from about 1100 ns to about 300 ns.
88
89#include "config.h"
90#include <gperftools/tcmalloc.h>
91
92#include <errno.h>                      // for ENOMEM, EINVAL, errno
93#ifdef HAVE_SYS_CDEFS_H
94#include <sys/cdefs.h>                  // for __THROW
95#endif
96#if defined HAVE_STDINT_H
97#include <stdint.h>
98#elif defined HAVE_INTTYPES_H
99#include <inttypes.h>
100#else
101#include <sys/types.h>
102#endif
103#include <stddef.h>                     // for size_t, NULL
104#include <stdlib.h>                     // for getenv
105#include <string.h>                     // for strcmp, memset, strlen, etc
106#ifdef HAVE_UNISTD_H
107#include <unistd.h>                     // for getpagesize, write, etc
108#endif
109#include <algorithm>                    // for max, min
110#include <limits>                       // for numeric_limits
111#include <new>                          // for nothrow_t (ptr only), etc
112#include <vector>                       // for vector
113
114#include <gperftools/malloc_extension.h>
115#include <gperftools/malloc_hook.h>         // for MallocHook
116#include "base/basictypes.h"            // for int64
117#include "base/commandlineflags.h"      // for RegisterFlagValidator, etc
118#include "base/dynamic_annotations.h"   // for RunningOnValgrind
119#include "base/spinlock.h"              // for SpinLockHolder
120#include "central_freelist.h"  // for CentralFreeListPadded
121#include "common.h"            // for StackTrace, kPageShift, etc
122#include "internal_logging.h"  // for ASSERT, TCMalloc_Printer, etc
123#include "linked_list.h"       // for SLL_SetNext
124#include "malloc_hook-inl.h"       // for MallocHook::InvokeNewHook, etc
125#include "page_heap.h"         // for PageHeap, PageHeap::Stats
126#include "page_heap_allocator.h"  // for PageHeapAllocator
127#include "span.h"              // for Span, DLL_Prepend, etc
128#include "stack_trace_table.h"  // for StackTraceTable
129#include "static_vars.h"       // for Static
130#include "system-alloc.h"      // for DumpSystemAllocatorStats, etc
131#include "tcmalloc_guard.h"    // for TCMallocGuard
132#include "thread_cache.h"      // for ThreadCache
133
134// We only need malloc.h for struct mallinfo.
135#ifdef HAVE_STRUCT_MALLINFO
136// Malloc can be in several places on older versions of OS X.
137# if defined(HAVE_MALLOC_H)
138# include <malloc.h>
139# elif defined(HAVE_SYS_MALLOC_H)
140# include <sys/malloc.h>
141# elif defined(HAVE_MALLOC_MALLOC_H)
142# include <malloc/malloc.h>
143# endif
144#endif
145
146#if (defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)) && !defined(WIN32_OVERRIDE_ALLOCATORS)
147# define WIN32_DO_PATCHING 1
148#endif
149
150// Some windows file somewhere (at least on cygwin) #define's small (!)
151#undef small
152
153using STL_NAMESPACE::max;
154using STL_NAMESPACE::numeric_limits;
155using STL_NAMESPACE::vector;
156
157#include "libc_override.h"
158
159// __THROW is defined in glibc (via <sys/cdefs.h>).  It means,
160// counter-intuitively, "This function will never throw an exception."
161// It's an optional optimization tool, but we may need to use it to
162// match glibc prototypes.
163#ifndef __THROW    // I guess we're not on a glibc system
164# define __THROW   // __THROW is just an optimization, so ok to make it ""
165#endif
166
167using tcmalloc::AlignmentForSize;
168using tcmalloc::kLog;
169using tcmalloc::kCrash;
170using tcmalloc::kCrashWithStats;
171using tcmalloc::Log;
172using tcmalloc::PageHeap;
173using tcmalloc::PageHeapAllocator;
174using tcmalloc::SizeMap;
175using tcmalloc::Span;
176using tcmalloc::StackTrace;
177using tcmalloc::Static;
178using tcmalloc::ThreadCache;
179
180DECLARE_int64(tcmalloc_sample_parameter);
181DECLARE_double(tcmalloc_release_rate);
182
183// For windows, the printf we use to report large allocs is
184// potentially dangerous: it could cause a malloc that would cause an
185// infinite loop.  So by default we set the threshold to a huge number
186// on windows, so this bad situation will never trigger.  You can
187// always set TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD manually if you
188// want this functionality.
189#ifdef _WIN32
190const int64 kDefaultLargeAllocReportThreshold = static_cast<int64>(1) << 62;
191#else
192const int64 kDefaultLargeAllocReportThreshold = static_cast<int64>(1) << 30;
193#endif
194DEFINE_int64(tcmalloc_large_alloc_report_threshold,
195             EnvToInt64("TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD",
196                        kDefaultLargeAllocReportThreshold),
197             "Allocations larger than this value cause a stack "
198             "trace to be dumped to stderr.  The threshold for "
199             "dumping stack traces is increased by a factor of 1.125 "
200             "every time we print a message so that the threshold "
201             "automatically goes up by a factor of ~1000 every 60 "
202             "messages.  This bounds the amount of extra logging "
203             "generated by this flag.  Default value of this flag "
204             "is very large and therefore you should see no extra "
205             "logging unless the flag is overridden.  Set to 0 to "
206             "disable reporting entirely.");
207
208
209// We already declared these functions in tcmalloc.h, but we have to
210// declare them again to give them an ATTRIBUTE_SECTION: we want to
211// put all callers of MallocHook::Invoke* in this module into
212// ATTRIBUTE_SECTION(google_malloc) section, so that
213// MallocHook::GetCallerStackTrace can function accurately.
214#ifndef _WIN32   // windows doesn't have attribute_section, so don't bother
215extern "C" {
216  void* tc_malloc(size_t size) __THROW
217      ATTRIBUTE_SECTION(google_malloc);
218  void tc_free(void* ptr) __THROW
219      ATTRIBUTE_SECTION(google_malloc);
220  void* tc_realloc(void* ptr, size_t size) __THROW
221      ATTRIBUTE_SECTION(google_malloc);
222  void* tc_calloc(size_t nmemb, size_t size) __THROW
223      ATTRIBUTE_SECTION(google_malloc);
224  void tc_cfree(void* ptr) __THROW
225      ATTRIBUTE_SECTION(google_malloc);
226
227  void* tc_memalign(size_t __alignment, size_t __size) __THROW
228      ATTRIBUTE_SECTION(google_malloc);
229  int tc_posix_memalign(void** ptr, size_t align, size_t size) __THROW
230      ATTRIBUTE_SECTION(google_malloc);
231  void* tc_valloc(size_t __size) __THROW
232      ATTRIBUTE_SECTION(google_malloc);
233  void* tc_pvalloc(size_t __size) __THROW
234      ATTRIBUTE_SECTION(google_malloc);
235
236  void tc_malloc_stats(void) __THROW
237      ATTRIBUTE_SECTION(google_malloc);
238  int tc_mallopt(int cmd, int value) __THROW
239      ATTRIBUTE_SECTION(google_malloc);
240#ifdef HAVE_STRUCT_MALLINFO
241  struct mallinfo tc_mallinfo(void) __THROW
242      ATTRIBUTE_SECTION(google_malloc);
243#endif
244
245  void* tc_new(size_t size)
246      ATTRIBUTE_SECTION(google_malloc);
247  void tc_delete(void* p) __THROW
248      ATTRIBUTE_SECTION(google_malloc);
249  void* tc_newarray(size_t size)
250      ATTRIBUTE_SECTION(google_malloc);
251  void tc_deletearray(void* p) __THROW
252      ATTRIBUTE_SECTION(google_malloc);
253
254  // And the nothrow variants of these:
255  void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW
256      ATTRIBUTE_SECTION(google_malloc);
257  void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) __THROW
258      ATTRIBUTE_SECTION(google_malloc);
259  // Surprisingly, standard C++ library implementations use a
260  // nothrow-delete internally.  See, eg:
261  // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html
262  void tc_delete_nothrow(void* ptr, const std::nothrow_t&) __THROW
263      ATTRIBUTE_SECTION(google_malloc);
264  void tc_deletearray_nothrow(void* ptr, const std::nothrow_t&) __THROW
265      ATTRIBUTE_SECTION(google_malloc);
266
267  // Some non-standard extensions that we support.
268
269  // This is equivalent to
270  //    OS X: malloc_size()
271  //    glibc: malloc_usable_size()
272  //    Windows: _msize()
273  size_t tc_malloc_size(void* p) __THROW
274      ATTRIBUTE_SECTION(google_malloc);
275}  // extern "C"
276#endif  // #ifndef _WIN32
277
278// ----------------------- IMPLEMENTATION -------------------------------
279
280static int tc_new_mode = 0;  // See tc_set_new_mode().
281
282// Routines such as free() and realloc() catch some erroneous pointers
283// passed to them, and invoke the below when they do.  (An erroneous pointer
284// won't be caught if it's within a valid span or a stale span for which
285// the pagemap cache has a non-zero sizeclass.) This is a cheap (source-editing
286// required) kind of exception handling for these routines.
287namespace {
288void InvalidFree(void* ptr) {
289  Log(kCrash, __FILE__, __LINE__, "Attempt to free invalid pointer", ptr);
290}
291
292size_t InvalidGetSizeForRealloc(const void* old_ptr) {
293  Log(kCrash, __FILE__, __LINE__,
294      "Attempt to realloc invalid pointer", old_ptr);
295  return 0;
296}
297
298size_t InvalidGetAllocatedSize(const void* ptr) {
299  Log(kCrash, __FILE__, __LINE__,
300      "Attempt to get the size of an invalid pointer", ptr);
301  return 0;
302}
303}  // unnamed namespace
304
305// Extract interesting stats
306struct TCMallocStats {
307  uint64_t thread_bytes;      // Bytes in thread caches
308  uint64_t central_bytes;     // Bytes in central cache
309  uint64_t transfer_bytes;    // Bytes in central transfer cache
310  uint64_t metadata_bytes;    // Bytes alloced for metadata
311  PageHeap::Stats pageheap;   // Stats from page heap
312};
313
314// Get stats into "r".  Also get per-size-class counts if class_count != NULL
315static void ExtractStats(TCMallocStats* r, uint64_t* class_count,
316                         PageHeap::SmallSpanStats* small_spans,
317                         PageHeap::LargeSpanStats* large_spans) {
318  r->central_bytes = 0;
319  r->transfer_bytes = 0;
320  for (int cl = 0; cl < kNumClasses; ++cl) {
321    const int length = Static::central_cache()[cl].length();
322    const int tc_length = Static::central_cache()[cl].tc_length();
323    const size_t cache_overhead = Static::central_cache()[cl].OverheadBytes();
324    const size_t size = static_cast<uint64_t>(
325        Static::sizemap()->ByteSizeForClass(cl));
326    r->central_bytes += (size * length) + cache_overhead;
327    r->transfer_bytes += (size * tc_length);
328    if (class_count) class_count[cl] = length + tc_length;
329  }
330
331  // Add stats from per-thread heaps
332  r->thread_bytes = 0;
333  { // scope
334    SpinLockHolder h(Static::pageheap_lock());
335    ThreadCache::GetThreadStats(&r->thread_bytes, class_count);
336    r->metadata_bytes = tcmalloc::metadata_system_bytes();
337    r->pageheap = Static::pageheap()->stats();
338    if (small_spans != NULL) {
339      Static::pageheap()->GetSmallSpanStats(small_spans);
340    }
341    if (large_spans != NULL) {
342      Static::pageheap()->GetLargeSpanStats(large_spans);
343    }
344  }
345}
346
347static double PagesToMiB(uint64_t pages) {
348  return (pages << kPageShift) / 1048576.0;
349}
350
351// WRITE stats to "out"
352static void DumpStats(TCMalloc_Printer* out, int level) {
353  TCMallocStats stats;
354  uint64_t class_count[kNumClasses];
355  PageHeap::SmallSpanStats small;
356  PageHeap::LargeSpanStats large;
357  if (level >= 2) {
358    ExtractStats(&stats, class_count, &small, &large);
359  } else {
360    ExtractStats(&stats, NULL, NULL, NULL);
361  }
362
363  static const double MiB = 1048576.0;
364
365  const uint64_t virtual_memory_used = (stats.pageheap.system_bytes
366                                        + stats.metadata_bytes);
367  const uint64_t physical_memory_used = (virtual_memory_used
368                                         - stats.pageheap.unmapped_bytes);
369  const uint64_t bytes_in_use_by_app = (physical_memory_used
370                                        - stats.metadata_bytes
371                                        - stats.pageheap.free_bytes
372                                        - stats.central_bytes
373                                        - stats.transfer_bytes
374                                        - stats.thread_bytes);
375
376#ifdef TCMALLOC_SMALL_BUT_SLOW
377  out->printf(
378      "NOTE:  SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n");
379#endif
380  out->printf(
381      "------------------------------------------------\n"
382      "MALLOC:   %12" PRIu64 " (%7.1f MiB) Bytes in use by application\n"
383      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n"
384      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n"
385      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n"
386      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in thread cache freelists\n"
387      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in malloc metadata\n"
388      "MALLOC:   ------------\n"
389      "MALLOC: = %12" PRIu64 " (%7.1f MiB) Actual memory used (physical + swap)\n"
390      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes released to OS (aka unmapped)\n"
391      "MALLOC:   ------------\n"
392      "MALLOC: = %12" PRIu64 " (%7.1f MiB) Virtual address space used\n"
393      "MALLOC:\n"
394      "MALLOC:   %12" PRIu64 "              Spans in use\n"
395      "MALLOC:   %12" PRIu64 "              Thread heaps in use\n"
396      "MALLOC:   %12" PRIu64 "              Tcmalloc page size\n"
397      "------------------------------------------------\n"
398      "Call ReleaseFreeMemory() to release freelist memory to the OS"
399      " (via madvise()).\n"
400      "Bytes released to the OS take up virtual address space"
401      " but no physical memory.\n",
402      bytes_in_use_by_app, bytes_in_use_by_app / MiB,
403      stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB,
404      stats.central_bytes, stats.central_bytes / MiB,
405      stats.transfer_bytes, stats.transfer_bytes / MiB,
406      stats.thread_bytes, stats.thread_bytes / MiB,
407      stats.metadata_bytes, stats.metadata_bytes / MiB,
408      physical_memory_used, physical_memory_used / MiB,
409      stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MiB,
410      virtual_memory_used, virtual_memory_used / MiB,
411      uint64_t(Static::span_allocator()->inuse()),
412      uint64_t(ThreadCache::HeapsInUse()),
413      uint64_t(kPageSize));
414
415  if (level >= 2) {
416    out->printf("------------------------------------------------\n");
417    out->printf("Size class breakdown\n");
418    out->printf("------------------------------------------------\n");
419    uint64_t cumulative = 0;
420    for (int cl = 0; cl < kNumClasses; ++cl) {
421      if (class_count[cl] > 0) {
422        uint64_t class_bytes =
423            class_count[cl] * Static::sizemap()->ByteSizeForClass(cl);
424        cumulative += class_bytes;
425        out->printf("class %3d [ %8" PRIuS " bytes ] : "
426                "%8" PRIu64 " objs; %5.1f MiB; %5.1f cum MiB\n",
427                cl, Static::sizemap()->ByteSizeForClass(cl),
428                class_count[cl],
429                class_bytes / MiB,
430                cumulative / MiB);
431      }
432    }
433
434    // append page heap info
435    int nonempty_sizes = 0;
436    for (int s = 0; s < kMaxPages; s++) {
437      if (small.normal_length[s] + small.returned_length[s] > 0) {
438        nonempty_sizes++;
439      }
440    }
441    out->printf("------------------------------------------------\n");
442    out->printf("PageHeap: %d sizes; %6.1f MiB free; %6.1f MiB unmapped\n",
443                nonempty_sizes, stats.pageheap.free_bytes / MiB,
444                stats.pageheap.unmapped_bytes / MiB);
445    out->printf("------------------------------------------------\n");
446    uint64_t total_normal = 0;
447    uint64_t total_returned = 0;
448    for (int s = 0; s < kMaxPages; s++) {
449      const int n_length = small.normal_length[s];
450      const int r_length = small.returned_length[s];
451      if (n_length + r_length > 0) {
452        uint64_t n_pages = s * n_length;
453        uint64_t r_pages = s * r_length;
454        total_normal += n_pages;
455        total_returned += r_pages;
456        out->printf("%6u pages * %6u spans ~ %6.1f MiB; %6.1f MiB cum"
457                    "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
458                    s,
459                    (n_length + r_length),
460                    PagesToMiB(n_pages + r_pages),
461                    PagesToMiB(total_normal + total_returned),
462                    PagesToMiB(r_pages),
463                    PagesToMiB(total_returned));
464      }
465    }
466
467    total_normal += large.normal_pages;
468    total_returned += large.returned_pages;
469    out->printf(">255   large * %6u spans ~ %6.1f MiB; %6.1f MiB cum"
470                "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
471                static_cast<unsigned int>(large.spans),
472                PagesToMiB(large.normal_pages + large.returned_pages),
473                PagesToMiB(total_normal + total_returned),
474                PagesToMiB(large.returned_pages),
475                PagesToMiB(total_returned));
476  }
477}
478
479static void PrintStats(int level) {
480  const int kBufferSize = 16 << 10;
481  char* buffer = new char[kBufferSize];
482  TCMalloc_Printer printer(buffer, kBufferSize);
483  DumpStats(&printer, level);
484  write(STDERR_FILENO, buffer, strlen(buffer));
485  delete[] buffer;
486}
487
488static void** DumpHeapGrowthStackTraces() {
489  // Count how much space we need
490  int needed_slots = 0;
491  {
492    SpinLockHolder h(Static::pageheap_lock());
493    for (StackTrace* t = Static::growth_stacks();
494         t != NULL;
495         t = reinterpret_cast<StackTrace*>(
496             t->stack[tcmalloc::kMaxStackDepth-1])) {
497      needed_slots += 3 + t->depth;
498    }
499    needed_slots += 100;            // Slop in case list grows
500    needed_slots += needed_slots/8; // An extra 12.5% slop
501  }
502
503  void** result = new void*[needed_slots];
504  if (result == NULL) {
505    Log(kLog, __FILE__, __LINE__,
506        "tcmalloc: allocation failed for stack trace slots",
507        needed_slots * sizeof(*result));
508    return NULL;
509  }
510
511  SpinLockHolder h(Static::pageheap_lock());
512  int used_slots = 0;
513  for (StackTrace* t = Static::growth_stacks();
514       t != NULL;
515       t = reinterpret_cast<StackTrace*>(
516           t->stack[tcmalloc::kMaxStackDepth-1])) {
517    ASSERT(used_slots < needed_slots);  // Need to leave room for terminator
518    if (used_slots + 3 + t->depth >= needed_slots) {
519      // No more room
520      break;
521    }
522
523    result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1));
524    result[used_slots+1] = reinterpret_cast<void*>(t->size);
525    result[used_slots+2] = reinterpret_cast<void*>(t->depth);
526    for (int d = 0; d < t->depth; d++) {
527      result[used_slots+3+d] = t->stack[d];
528    }
529    used_slots += 3 + t->depth;
530  }
531  result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0));
532  return result;
533}
534
535static void IterateOverRanges(void* arg, MallocExtension::RangeFunction func) {
536  PageID page = 1;  // Some code may assume that page==0 is never used
537  bool done = false;
538  while (!done) {
539    // Accumulate a small number of ranges in a local buffer
540    static const int kNumRanges = 16;
541    static base::MallocRange ranges[kNumRanges];
542    int n = 0;
543    {
544      SpinLockHolder h(Static::pageheap_lock());
545      while (n < kNumRanges) {
546        if (!Static::pageheap()->GetNextRange(page, &ranges[n])) {
547          done = true;
548          break;
549        } else {
550          uintptr_t limit = ranges[n].address + ranges[n].length;
551          page = (limit + kPageSize - 1) >> kPageShift;
552          n++;
553        }
554      }
555    }
556
557    for (int i = 0; i < n; i++) {
558      (*func)(arg, &ranges[i]);
559    }
560  }
561}
562
563// TCMalloc's support for extra malloc interfaces
564class TCMallocImplementation : public MallocExtension {
565 private:
566  // ReleaseToSystem() might release more than the requested bytes because
567  // the page heap releases at the span granularity, and spans are of wildly
568  // different sizes.  This member keeps track of the extra bytes bytes
569  // released so that the app can periodically call ReleaseToSystem() to
570  // release memory at a constant rate.
571  // NOTE: Protected by Static::pageheap_lock().
572  size_t extra_bytes_released_;
573
574 public:
575  TCMallocImplementation()
576      : extra_bytes_released_(0) {
577  }
578
579  virtual void GetStats(char* buffer, int buffer_length) {
580    ASSERT(buffer_length > 0);
581    TCMalloc_Printer printer(buffer, buffer_length);
582
583    // Print level one stats unless lots of space is available
584    if (buffer_length < 10000) {
585      DumpStats(&printer, 1);
586    } else {
587      DumpStats(&printer, 2);
588    }
589  }
590
591  // We may print an extra, tcmalloc-specific warning message here.
592  virtual void GetHeapSample(MallocExtensionWriter* writer) {
593    if (FLAGS_tcmalloc_sample_parameter == 0) {
594      const char* const kWarningMsg =
595          "%warn\n"
596          "%warn This heap profile does not have any data in it, because\n"
597          "%warn the application was run with heap sampling turned off.\n"
598          "%warn To get useful data from GetHeapSample(), you must\n"
599          "%warn set the environment variable TCMALLOC_SAMPLE_PARAMETER to\n"
600          "%warn a positive sampling period, such as 524288.\n"
601          "%warn\n";
602      writer->append(kWarningMsg, strlen(kWarningMsg));
603    }
604    MallocExtension::GetHeapSample(writer);
605  }
606
607  virtual void** ReadStackTraces(int* sample_period) {
608    tcmalloc::StackTraceTable table;
609    {
610      SpinLockHolder h(Static::pageheap_lock());
611      Span* sampled = Static::sampled_objects();
612      for (Span* s = sampled->next; s != sampled; s = s->next) {
613        table.AddTrace(*reinterpret_cast<StackTrace*>(s->objects));
614      }
615    }
616    *sample_period = ThreadCache::GetCache()->GetSamplePeriod();
617    return table.ReadStackTracesAndClear(); // grabs and releases pageheap_lock
618  }
619
620  virtual void** ReadHeapGrowthStackTraces() {
621    return DumpHeapGrowthStackTraces();
622  }
623
624  virtual void Ranges(void* arg, RangeFunction func) {
625    IterateOverRanges(arg, func);
626  }
627
628  virtual bool GetNumericProperty(const char* name, size_t* value) {
629    ASSERT(name != NULL);
630
631    if (strcmp(name, "generic.current_allocated_bytes") == 0) {
632      TCMallocStats stats;
633      ExtractStats(&stats, NULL, NULL, NULL);
634      *value = stats.pageheap.system_bytes
635               - stats.thread_bytes
636               - stats.central_bytes
637               - stats.transfer_bytes
638               - stats.pageheap.free_bytes
639               - stats.pageheap.unmapped_bytes;
640      return true;
641    }
642
643    if (strcmp(name, "generic.heap_size") == 0) {
644      TCMallocStats stats;
645      ExtractStats(&stats, NULL, NULL, NULL);
646      *value = stats.pageheap.system_bytes;
647      return true;
648    }
649
650    if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
651      // Kept for backwards compatibility.  Now defined externally as:
652      //    pageheap_free_bytes + pageheap_unmapped_bytes.
653      SpinLockHolder l(Static::pageheap_lock());
654      PageHeap::Stats stats = Static::pageheap()->stats();
655      *value = stats.free_bytes + stats.unmapped_bytes;
656      return true;
657    }
658
659    if (strcmp(name, "tcmalloc.central_cache_free_bytes") == 0) {
660      TCMallocStats stats;
661      ExtractStats(&stats, NULL, NULL, NULL);
662      *value = stats.central_bytes;
663      return true;
664    }
665
666    if (strcmp(name, "tcmalloc.transfer_cache_free_bytes") == 0) {
667      TCMallocStats stats;
668      ExtractStats(&stats, NULL, NULL, NULL);
669      *value = stats.transfer_bytes;
670      return true;
671    }
672
673    if (strcmp(name, "tcmalloc.thread_cache_free_bytes") == 0) {
674      TCMallocStats stats;
675      ExtractStats(&stats, NULL, NULL, NULL);
676      *value = stats.thread_bytes;
677      return true;
678    }
679
680    if (strcmp(name, "tcmalloc.pageheap_free_bytes") == 0) {
681      SpinLockHolder l(Static::pageheap_lock());
682      *value = Static::pageheap()->stats().free_bytes;
683      return true;
684    }
685
686    if (strcmp(name, "tcmalloc.pageheap_unmapped_bytes") == 0) {
687      SpinLockHolder l(Static::pageheap_lock());
688      *value = Static::pageheap()->stats().unmapped_bytes;
689      return true;
690    }
691
692    if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
693      SpinLockHolder l(Static::pageheap_lock());
694      *value = ThreadCache::overall_thread_cache_size();
695      return true;
696    }
697
698    if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) {
699      TCMallocStats stats;
700      ExtractStats(&stats, NULL, NULL, NULL);
701      *value = stats.thread_bytes;
702      return true;
703    }
704
705    return false;
706  }
707
708  virtual bool SetNumericProperty(const char* name, size_t value) {
709    ASSERT(name != NULL);
710
711    if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
712      SpinLockHolder l(Static::pageheap_lock());
713      ThreadCache::set_overall_thread_cache_size(value);
714      return true;
715    }
716
717    return false;
718  }
719
720  virtual void MarkThreadIdle() {
721    ThreadCache::BecomeIdle();
722  }
723
724  virtual void MarkThreadBusy();  // Implemented below
725
726  virtual SysAllocator* GetSystemAllocator() {
727    SpinLockHolder h(Static::pageheap_lock());
728    return sys_alloc;
729  }
730
731  virtual void SetSystemAllocator(SysAllocator* alloc) {
732    SpinLockHolder h(Static::pageheap_lock());
733    sys_alloc = alloc;
734  }
735
736  virtual void ReleaseToSystem(size_t num_bytes) {
737    SpinLockHolder h(Static::pageheap_lock());
738    if (num_bytes <= extra_bytes_released_) {
739      // We released too much on a prior call, so don't release any
740      // more this time.
741      extra_bytes_released_ = extra_bytes_released_ - num_bytes;
742      return;
743    }
744    num_bytes = num_bytes - extra_bytes_released_;
745    // num_bytes might be less than one page.  If we pass zero to
746    // ReleaseAtLeastNPages, it won't do anything, so we release a whole
747    // page now and let extra_bytes_released_ smooth it out over time.
748    Length num_pages = max<Length>(num_bytes >> kPageShift, 1);
749    size_t bytes_released = Static::pageheap()->ReleaseAtLeastNPages(
750        num_pages) << kPageShift;
751    if (bytes_released > num_bytes) {
752      extra_bytes_released_ = bytes_released - num_bytes;
753    } else {
754      // The PageHeap wasn't able to release num_bytes.  Don't try to
755      // compensate with a big release next time.  Specifically,
756      // ReleaseFreeMemory() calls ReleaseToSystem(LONG_MAX).
757      extra_bytes_released_ = 0;
758    }
759  }
760
761  virtual void SetMemoryReleaseRate(double rate) {
762    FLAGS_tcmalloc_release_rate = rate;
763  }
764
765  virtual double GetMemoryReleaseRate() {
766    return FLAGS_tcmalloc_release_rate;
767  }
768  virtual size_t GetEstimatedAllocatedSize(size_t size) {
769    if (size <= kMaxSize) {
770      const size_t cl = Static::sizemap()->SizeClass(size);
771      const size_t alloc_size = Static::sizemap()->ByteSizeForClass(cl);
772      return alloc_size;
773    } else {
774      return tcmalloc::pages(size) << kPageShift;
775    }
776  }
777
778  // This just calls GetSizeWithCallback, but because that's in an
779  // unnamed namespace, we need to move the definition below it in the
780  // file.
781  virtual size_t GetAllocatedSize(const void* ptr);
782
783  // This duplicates some of the logic in GetSizeWithCallback, but is
784  // faster.  This is important on OS X, where this function is called
785  // on every allocation operation.
786  virtual Ownership GetOwnership(const void* ptr) {
787    const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
788    // The rest of tcmalloc assumes that all allocated pointers use at
789    // most kAddressBits bits.  If ptr doesn't, then it definitely
790    // wasn't alloacted by tcmalloc.
791    if ((p >> (kAddressBits - kPageShift)) > 0) {
792      return kNotOwned;
793    }
794    size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
795    if (cl != 0) {
796      return kOwned;
797    }
798    const Span *span = Static::pageheap()->GetDescriptor(p);
799    return span ? kOwned : kNotOwned;
800  }
801
802  virtual void GetFreeListSizes(vector<MallocExtension::FreeListInfo>* v) {
803    static const char* kCentralCacheType = "tcmalloc.central";
804    static const char* kTransferCacheType = "tcmalloc.transfer";
805    static const char* kThreadCacheType = "tcmalloc.thread";
806    static const char* kPageHeapType = "tcmalloc.page";
807    static const char* kPageHeapUnmappedType = "tcmalloc.page_unmapped";
808    static const char* kLargeSpanType = "tcmalloc.large";
809    static const char* kLargeUnmappedSpanType = "tcmalloc.large_unmapped";
810
811    v->clear();
812
813    // central class information
814    int64 prev_class_size = 0;
815    for (int cl = 1; cl < kNumClasses; ++cl) {
816      size_t class_size = Static::sizemap()->ByteSizeForClass(cl);
817      MallocExtension::FreeListInfo i;
818      i.min_object_size = prev_class_size + 1;
819      i.max_object_size = class_size;
820      i.total_bytes_free =
821          Static::central_cache()[cl].length() * class_size;
822      i.type = kCentralCacheType;
823      v->push_back(i);
824
825      // transfer cache
826      i.total_bytes_free =
827          Static::central_cache()[cl].tc_length() * class_size;
828      i.type = kTransferCacheType;
829      v->push_back(i);
830
831      prev_class_size = Static::sizemap()->ByteSizeForClass(cl);
832    }
833
834    // Add stats from per-thread heaps
835    uint64_t class_count[kNumClasses];
836    memset(class_count, 0, sizeof(class_count));
837    {
838      SpinLockHolder h(Static::pageheap_lock());
839      uint64_t thread_bytes = 0;
840      ThreadCache::GetThreadStats(&thread_bytes, class_count);
841    }
842
843    prev_class_size = 0;
844    for (int cl = 1; cl < kNumClasses; ++cl) {
845      MallocExtension::FreeListInfo i;
846      i.min_object_size = prev_class_size + 1;
847      i.max_object_size = Static::sizemap()->ByteSizeForClass(cl);
848      i.total_bytes_free =
849          class_count[cl] * Static::sizemap()->ByteSizeForClass(cl);
850      i.type = kThreadCacheType;
851      v->push_back(i);
852    }
853
854    // append page heap info
855    PageHeap::SmallSpanStats small;
856    PageHeap::LargeSpanStats large;
857    {
858      SpinLockHolder h(Static::pageheap_lock());
859      Static::pageheap()->GetSmallSpanStats(&small);
860      Static::pageheap()->GetLargeSpanStats(&large);
861    }
862
863    // large spans: mapped
864    MallocExtension::FreeListInfo span_info;
865    span_info.type = kLargeSpanType;
866    span_info.max_object_size = (numeric_limits<size_t>::max)();
867    span_info.min_object_size = kMaxPages << kPageShift;
868    span_info.total_bytes_free = large.normal_pages << kPageShift;
869    v->push_back(span_info);
870
871    // large spans: unmapped
872    span_info.type = kLargeUnmappedSpanType;
873    span_info.total_bytes_free = large.returned_pages << kPageShift;
874    v->push_back(span_info);
875
876    // small spans
877    for (int s = 1; s < kMaxPages; s++) {
878      MallocExtension::FreeListInfo i;
879      i.max_object_size = (s << kPageShift);
880      i.min_object_size = ((s - 1) << kPageShift);
881
882      i.type = kPageHeapType;
883      i.total_bytes_free = (s << kPageShift) * small.normal_length[s];
884      v->push_back(i);
885
886      i.type = kPageHeapUnmappedType;
887      i.total_bytes_free = (s << kPageShift) * small.returned_length[s];
888      v->push_back(i);
889    }
890  }
891};
892
893// The constructor allocates an object to ensure that initialization
894// runs before main(), and therefore we do not have a chance to become
895// multi-threaded before initialization.  We also create the TSD key
896// here.  Presumably by the time this constructor runs, glibc is in
897// good enough shape to handle pthread_key_create().
898//
899// The constructor also takes the opportunity to tell STL to use
900// tcmalloc.  We want to do this early, before construct time, so
901// all user STL allocations go through tcmalloc (which works really
902// well for STL).
903//
904// The destructor prints stats when the program exits.
905static int tcmallocguard_refcount = 0;  // no lock needed: runs before main()
906TCMallocGuard::TCMallocGuard() {
907  if (tcmallocguard_refcount++ == 0) {
908#ifdef HAVE_TLS    // this is true if the cc/ld/libc combo support TLS
909    // Check whether the kernel also supports TLS (needs to happen at runtime)
910    tcmalloc::CheckIfKernelSupportsTLS();
911#endif
912    ReplaceSystemAlloc();    // defined in libc_override_*.h
913    tc_free(tc_malloc(1));
914    ThreadCache::InitTSD();
915    tc_free(tc_malloc(1));
916    // Either we, or debugallocation.cc, or valgrind will control memory
917    // management.  We register our extension if we're the winner.
918#ifdef TCMALLOC_USING_DEBUGALLOCATION
919    // Let debugallocation register its extension.
920#else
921    if (RunningOnValgrind()) {
922      // Let Valgrind uses its own malloc (so don't register our extension).
923    } else {
924      MallocExtension::Register(new TCMallocImplementation);
925    }
926#endif
927  }
928}
929
930TCMallocGuard::~TCMallocGuard() {
931  if (--tcmallocguard_refcount == 0) {
932    const char* env = getenv("MALLOCSTATS");
933    if (env != NULL) {
934      int level = atoi(env);
935      if (level < 1) level = 1;
936      PrintStats(level);
937    }
938  }
939}
940#ifndef WIN32_OVERRIDE_ALLOCATORS
941static TCMallocGuard module_enter_exit_hook;
942#endif
943
944//-------------------------------------------------------------------
945// Helpers for the exported routines below
946//-------------------------------------------------------------------
947
948static inline bool CheckCachedSizeClass(void *ptr) {
949  PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
950  size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p);
951  return cached_value == 0 ||
952      cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass;
953}
954
955static inline void* CheckedMallocResult(void *result) {
956  ASSERT(result == NULL || CheckCachedSizeClass(result));
957  return result;
958}
959
960static inline void* SpanToMallocResult(Span *span) {
961  Static::pageheap()->CacheSizeClass(span->start, 0);
962  return
963      CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
964}
965
966static void* DoSampledAllocation(size_t size) {
967  // Grab the stack trace outside the heap lock
968  StackTrace tmp;
969  tmp.depth = GetStackTrace(tmp.stack, tcmalloc::kMaxStackDepth, 1);
970  tmp.size = size;
971
972  SpinLockHolder h(Static::pageheap_lock());
973  // Allocate span
974  Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size));
975  if (span == NULL) {
976    return NULL;
977  }
978
979  // Allocate stack trace
980  StackTrace *stack = Static::stacktrace_allocator()->New();
981  if (stack == NULL) {
982    // Sampling failed because of lack of memory
983    return span;
984  }
985  *stack = tmp;
986  span->sample = 1;
987  span->objects = stack;
988  tcmalloc::DLL_Prepend(Static::sampled_objects(), span);
989
990  return SpanToMallocResult(span);
991}
992
993namespace {
994
995// Copy of FLAGS_tcmalloc_large_alloc_report_threshold with
996// automatic increases factored in.
997static int64_t large_alloc_threshold =
998  (kPageSize > FLAGS_tcmalloc_large_alloc_report_threshold
999   ? kPageSize : FLAGS_tcmalloc_large_alloc_report_threshold);
1000
1001static void ReportLargeAlloc(Length num_pages, void* result) {
1002  StackTrace stack;
1003  stack.depth = GetStackTrace(stack.stack, tcmalloc::kMaxStackDepth, 1);
1004
1005  static const int N = 1000;
1006  char buffer[N];
1007  TCMalloc_Printer printer(buffer, N);
1008  printer.printf("tcmalloc: large alloc %"PRIu64" bytes == %p @ ",
1009                 static_cast<uint64>(num_pages) << kPageShift,
1010                 result);
1011  for (int i = 0; i < stack.depth; i++) {
1012    printer.printf(" %p", stack.stack[i]);
1013  }
1014  printer.printf("\n");
1015  write(STDERR_FILENO, buffer, strlen(buffer));
1016}
1017
1018inline void* cpp_alloc(size_t size, bool nothrow);
1019inline void* do_malloc(size_t size);
1020
1021// TODO(willchan): Investigate whether or not lining this much is harmful to
1022// performance.
1023// This is equivalent to do_malloc() except when tc_new_mode is set to true.
1024// Otherwise, it will run the std::new_handler if set.
1025inline void* do_malloc_or_cpp_alloc(size_t size) {
1026  return tc_new_mode ? cpp_alloc(size, true) : do_malloc(size);
1027}
1028
1029void* cpp_memalign(size_t align, size_t size);
1030void* do_memalign(size_t align, size_t size);
1031
1032inline void* do_memalign_or_cpp_memalign(size_t align, size_t size) {
1033  return tc_new_mode ? cpp_memalign(align, size) : do_memalign(align, size);
1034}
1035
1036// Must be called with the page lock held.
1037inline bool should_report_large(Length num_pages) {
1038  const int64 threshold = large_alloc_threshold;
1039  if (threshold > 0 && num_pages >= (threshold >> kPageShift)) {
1040    // Increase the threshold by 1/8 every time we generate a report.
1041    // We cap the threshold at 8GiB to avoid overflow problems.
1042    large_alloc_threshold = (threshold + threshold/8 < 8ll<<30
1043                             ? threshold + threshold/8 : 8ll<<30);
1044    return true;
1045  }
1046  return false;
1047}
1048
1049// Helper for do_malloc().
1050inline void* do_malloc_pages(ThreadCache* heap, size_t size) {
1051  void* result;
1052  bool report_large;
1053
1054  Length num_pages = tcmalloc::pages(size);
1055  size = num_pages << kPageShift;
1056
1057  if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
1058    result = DoSampledAllocation(size);
1059
1060    SpinLockHolder h(Static::pageheap_lock());
1061    report_large = should_report_large(num_pages);
1062  } else {
1063    SpinLockHolder h(Static::pageheap_lock());
1064    Span* span = Static::pageheap()->New(num_pages);
1065    result = (span == NULL ? NULL : SpanToMallocResult(span));
1066    report_large = should_report_large(num_pages);
1067  }
1068
1069  if (report_large) {
1070    ReportLargeAlloc(num_pages, result);
1071  }
1072  return result;
1073}
1074
1075inline void* do_malloc(size_t size) {
1076  void* ret = NULL;
1077
1078  // The following call forces module initialization
1079  ThreadCache* heap = ThreadCache::GetCache();
1080  if (size <= kMaxSize) {
1081    size_t cl = Static::sizemap()->SizeClass(size);
1082    size = Static::sizemap()->class_to_size(cl);
1083
1084    if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
1085      ret = DoSampledAllocation(size);
1086    } else {
1087      // The common case, and also the simplest.  This just pops the
1088      // size-appropriate freelist, after replenishing it if it's empty.
1089      ret = CheckedMallocResult(heap->Allocate(size, cl));
1090    }
1091  } else {
1092    ret = do_malloc_pages(heap, size);
1093  }
1094  if (ret == NULL) errno = ENOMEM;
1095  return ret;
1096}
1097
1098inline void* do_calloc(size_t n, size_t elem_size) {
1099  // Overflow check
1100  const size_t size = n * elem_size;
1101  if (elem_size != 0 && size / elem_size != n) return NULL;
1102
1103  void* result = do_malloc_or_cpp_alloc(size);
1104  if (result != NULL) {
1105    memset(result, 0, size);
1106  }
1107  return result;
1108}
1109
1110static inline ThreadCache* GetCacheIfPresent() {
1111  void* const p = ThreadCache::GetCacheIfPresent();
1112  return reinterpret_cast<ThreadCache*>(p);
1113}
1114
1115// This lets you call back to a given function pointer if ptr is invalid.
1116// It is used primarily by windows code which wants a specialized callback.
1117inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
1118  if (ptr == NULL) return;
1119  if (Static::pageheap() == NULL) {
1120    // We called free() before malloc().  This can occur if the
1121    // (system) malloc() is called before tcmalloc is loaded, and then
1122    // free() is called after tcmalloc is loaded (and tc_free has
1123    // replaced free), but before the global constructor has run that
1124    // sets up the tcmalloc data structures.
1125    (*invalid_free_fn)(ptr);  // Decide how to handle the bad free request
1126    return;
1127  }
1128  const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
1129  Span* span = NULL;
1130  size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
1131
1132  if (cl == 0) {
1133    span = Static::pageheap()->GetDescriptor(p);
1134    if (!span) {
1135      // span can be NULL because the pointer passed in is invalid
1136      // (not something returned by malloc or friends), or because the
1137      // pointer was allocated with some other allocator besides
1138      // tcmalloc.  The latter can happen if tcmalloc is linked in via
1139      // a dynamic library, but is not listed last on the link line.
1140      // In that case, libraries after it on the link line will
1141      // allocate with libc malloc, but free with tcmalloc's free.
1142      (*invalid_free_fn)(ptr);  // Decide how to handle the bad free request
1143      return;
1144    }
1145    cl = span->sizeclass;
1146    Static::pageheap()->CacheSizeClass(p, cl);
1147  }
1148  if (cl != 0) {
1149    ASSERT(!Static::pageheap()->GetDescriptor(p)->sample);
1150    ThreadCache* heap = GetCacheIfPresent();
1151    if (heap != NULL) {
1152      heap->Deallocate(ptr, cl);
1153    } else {
1154      // Delete directly into central cache
1155      tcmalloc::SLL_SetNext(ptr, NULL);
1156      Static::central_cache()[cl].InsertRange(ptr, ptr, 1);
1157    }
1158  } else {
1159    SpinLockHolder h(Static::pageheap_lock());
1160    ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
1161    ASSERT(span != NULL && span->start == p);
1162    if (span->sample) {
1163      StackTrace* st = reinterpret_cast<StackTrace*>(span->objects);
1164      tcmalloc::DLL_Remove(span);
1165      Static::stacktrace_allocator()->Delete(st);
1166      span->objects = NULL;
1167    }
1168    Static::pageheap()->Delete(span);
1169  }
1170}
1171
1172// The default "do_free" that uses the default callback.
1173inline void do_free(void* ptr) {
1174  return do_free_with_callback(ptr, &InvalidFree);
1175}
1176
1177// NOTE: some logic here is duplicated in GetOwnership (above), for
1178// speed.  If you change this function, look at that one too.
1179inline size_t GetSizeWithCallback(const void* ptr,
1180                                  size_t (*invalid_getsize_fn)(const void*)) {
1181  if (ptr == NULL)
1182    return 0;
1183  const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
1184  size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
1185  if (cl != 0) {
1186    return Static::sizemap()->ByteSizeForClass(cl);
1187  } else {
1188    const Span *span = Static::pageheap()->GetDescriptor(p);
1189    if (span == NULL) {  // means we do not own this memory
1190      return (*invalid_getsize_fn)(ptr);
1191    } else if (span->sizeclass != 0) {
1192      Static::pageheap()->CacheSizeClass(p, span->sizeclass);
1193      return Static::sizemap()->ByteSizeForClass(span->sizeclass);
1194    } else {
1195      return span->length << kPageShift;
1196    }
1197  }
1198}
1199
1200// This lets you call back to a given function pointer if ptr is invalid.
1201// It is used primarily by windows code which wants a specialized callback.
1202inline void* do_realloc_with_callback(
1203    void* old_ptr, size_t new_size,
1204    void (*invalid_free_fn)(void*),
1205    size_t (*invalid_get_size_fn)(const void*)) {
1206  // Get the size of the old entry
1207  const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn);
1208
1209  // Reallocate if the new size is larger than the old size,
1210  // or if the new size is significantly smaller than the old size.
1211  // We do hysteresis to avoid resizing ping-pongs:
1212  //    . If we need to grow, grow to max(new_size, old_size * 1.X)
1213  //    . Don't shrink unless new_size < old_size * 0.Y
1214  // X and Y trade-off time for wasted space.  For now we do 1.25 and 0.5.
1215  const int lower_bound_to_grow = old_size + old_size / 4;
1216  const int upper_bound_to_shrink = old_size / 2;
1217  if ((new_size > old_size) || (new_size < upper_bound_to_shrink)) {
1218    // Need to reallocate.
1219    void* new_ptr = NULL;
1220
1221    if (new_size > old_size && new_size < lower_bound_to_grow) {
1222      new_ptr = do_malloc_or_cpp_alloc(lower_bound_to_grow);
1223    }
1224    if (new_ptr == NULL) {
1225      // Either new_size is not a tiny increment, or last do_malloc failed.
1226      new_ptr = do_malloc_or_cpp_alloc(new_size);
1227    }
1228    if (new_ptr == NULL) {
1229      return NULL;
1230    }
1231    MallocHook::InvokeNewHook(new_ptr, new_size);
1232    memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
1233    MallocHook::InvokeDeleteHook(old_ptr);
1234    // We could use a variant of do_free() that leverages the fact
1235    // that we already know the sizeclass of old_ptr.  The benefit
1236    // would be small, so don't bother.
1237    do_free_with_callback(old_ptr, invalid_free_fn);
1238    return new_ptr;
1239  } else {
1240    // We still need to call hooks to report the updated size:
1241    MallocHook::InvokeDeleteHook(old_ptr);
1242    MallocHook::InvokeNewHook(old_ptr, new_size);
1243    return old_ptr;
1244  }
1245}
1246
1247inline void* do_realloc(void* old_ptr, size_t new_size) {
1248  return do_realloc_with_callback(old_ptr, new_size,
1249                                  &InvalidFree, &InvalidGetSizeForRealloc);
1250}
1251
1252// For use by exported routines below that want specific alignments
1253//
1254// Note: this code can be slow for alignments > 16, and can
1255// significantly fragment memory.  The expectation is that
1256// memalign/posix_memalign/valloc/pvalloc will not be invoked very
1257// often.  This requirement simplifies our implementation and allows
1258// us to tune for expected allocation patterns.
1259void* do_memalign(size_t align, size_t size) {
1260  ASSERT((align & (align - 1)) == 0);
1261  ASSERT(align > 0);
1262  if (size + align < size) return NULL;         // Overflow
1263
1264  // Fall back to malloc if we would already align this memory access properly.
1265  if (align <= AlignmentForSize(size)) {
1266    void* p = do_malloc(size);
1267    ASSERT((reinterpret_cast<uintptr_t>(p) % align) == 0);
1268    return p;
1269  }
1270
1271  if (Static::pageheap() == NULL) ThreadCache::InitModule();
1272
1273  // Allocate at least one byte to avoid boundary conditions below
1274  if (size == 0) size = 1;
1275
1276  if (size <= kMaxSize && align < kPageSize) {
1277    // Search through acceptable size classes looking for one with
1278    // enough alignment.  This depends on the fact that
1279    // InitSizeClasses() currently produces several size classes that
1280    // are aligned at powers of two.  We will waste time and space if
1281    // we miss in the size class array, but that is deemed acceptable
1282    // since memalign() should be used rarely.
1283    int cl = Static::sizemap()->SizeClass(size);
1284    while (cl < kNumClasses &&
1285           ((Static::sizemap()->class_to_size(cl) & (align - 1)) != 0)) {
1286      cl++;
1287    }
1288    if (cl < kNumClasses) {
1289      ThreadCache* heap = ThreadCache::GetCache();
1290      size = Static::sizemap()->class_to_size(cl);
1291      return CheckedMallocResult(heap->Allocate(size, cl));
1292    }
1293  }
1294
1295  // We will allocate directly from the page heap
1296  SpinLockHolder h(Static::pageheap_lock());
1297
1298  if (align <= kPageSize) {
1299    // Any page-level allocation will be fine
1300    // TODO: We could put the rest of this page in the appropriate
1301    // TODO: cache but it does not seem worth it.
1302    Span* span = Static::pageheap()->New(tcmalloc::pages(size));
1303    return span == NULL ? NULL : SpanToMallocResult(span);
1304  }
1305
1306  // Allocate extra pages and carve off an aligned portion
1307  const Length alloc = tcmalloc::pages(size + align);
1308  Span* span = Static::pageheap()->New(alloc);
1309  if (span == NULL) return NULL;
1310
1311  // Skip starting portion so that we end up aligned
1312  Length skip = 0;
1313  while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
1314    skip++;
1315  }
1316  ASSERT(skip < alloc);
1317  if (skip > 0) {
1318    Span* rest = Static::pageheap()->Split(span, skip);
1319    Static::pageheap()->Delete(span);
1320    span = rest;
1321  }
1322
1323  // Skip trailing portion that we do not need to return
1324  const Length needed = tcmalloc::pages(size);
1325  ASSERT(span->length >= needed);
1326  if (span->length > needed) {
1327    Span* trailer = Static::pageheap()->Split(span, needed);
1328    Static::pageheap()->Delete(trailer);
1329  }
1330  return SpanToMallocResult(span);
1331}
1332
1333// Helpers for use by exported routines below:
1334
1335inline void do_malloc_stats() {
1336  PrintStats(1);
1337}
1338
1339inline int do_mallopt(int cmd, int value) {
1340  return 1;     // Indicates error
1341}
1342
1343#ifdef HAVE_STRUCT_MALLINFO
1344inline struct mallinfo do_mallinfo() {
1345  TCMallocStats stats;
1346  ExtractStats(&stats, NULL, NULL, NULL);
1347
1348  // Just some of the fields are filled in.
1349  struct mallinfo info;
1350  memset(&info, 0, sizeof(info));
1351
1352  // Unfortunately, the struct contains "int" field, so some of the
1353  // size values will be truncated.
1354  info.arena     = static_cast<int>(stats.pageheap.system_bytes);
1355  info.fsmblks   = static_cast<int>(stats.thread_bytes
1356                                    + stats.central_bytes
1357                                    + stats.transfer_bytes);
1358  info.fordblks  = static_cast<int>(stats.pageheap.free_bytes +
1359                                    stats.pageheap.unmapped_bytes);
1360  info.uordblks  = static_cast<int>(stats.pageheap.system_bytes
1361                                    - stats.thread_bytes
1362                                    - stats.central_bytes
1363                                    - stats.transfer_bytes
1364                                    - stats.pageheap.free_bytes
1365                                    - stats.pageheap.unmapped_bytes);
1366
1367  return info;
1368}
1369#endif  // HAVE_STRUCT_MALLINFO
1370
1371static SpinLock set_new_handler_lock(SpinLock::LINKER_INITIALIZED);
1372
1373inline void* cpp_alloc(size_t size, bool nothrow) {
1374  for (;;) {
1375    void* p = do_malloc(size);
1376#ifdef PREANSINEW
1377    return p;
1378#else
1379    if (p == NULL) {  // allocation failed
1380      // Get the current new handler.  NB: this function is not
1381      // thread-safe.  We make a feeble stab at making it so here, but
1382      // this lock only protects against tcmalloc interfering with
1383      // itself, not with other libraries calling set_new_handler.
1384      std::new_handler nh;
1385      {
1386        SpinLockHolder h(&set_new_handler_lock);
1387        nh = std::set_new_handler(0);
1388        (void) std::set_new_handler(nh);
1389      }
1390#if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS)
1391      if (nh) {
1392        // Since exceptions are disabled, we don't really know if new_handler
1393        // failed.  Assume it will abort if it fails.
1394        (*nh)();
1395        continue;
1396      }
1397      return 0;
1398#else
1399      // If no new_handler is established, the allocation failed.
1400      if (!nh) {
1401        if (nothrow) return 0;
1402        throw std::bad_alloc();
1403      }
1404      // Otherwise, try the new_handler.  If it returns, retry the
1405      // allocation.  If it throws std::bad_alloc, fail the allocation.
1406      // if it throws something else, don't interfere.
1407      try {
1408        (*nh)();
1409      } catch (const std::bad_alloc&) {
1410        if (!nothrow) throw;
1411        return p;
1412      }
1413#endif  // (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS)
1414    } else {  // allocation success
1415      return p;
1416    }
1417#endif  // PREANSINEW
1418  }
1419}
1420
1421void* cpp_memalign(size_t align, size_t size) {
1422  for (;;) {
1423    void* p = do_memalign(align, size);
1424#ifdef PREANSINEW
1425    return p;
1426#else
1427    if (p == NULL) {  // allocation failed
1428      // Get the current new handler.  NB: this function is not
1429      // thread-safe.  We make a feeble stab at making it so here, but
1430      // this lock only protects against tcmalloc interfering with
1431      // itself, not with other libraries calling set_new_handler.
1432      std::new_handler nh;
1433      {
1434        SpinLockHolder h(&set_new_handler_lock);
1435        nh = std::set_new_handler(0);
1436        (void) std::set_new_handler(nh);
1437      }
1438#if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS)
1439      if (nh) {
1440        // Since exceptions are disabled, we don't really know if new_handler
1441        // failed.  Assume it will abort if it fails.
1442        (*nh)();
1443        continue;
1444      }
1445      return 0;
1446#else
1447      // If no new_handler is established, the allocation failed.
1448      if (!nh)
1449        return 0;
1450
1451      // Otherwise, try the new_handler.  If it returns, retry the
1452      // allocation.  If it throws std::bad_alloc, fail the allocation.
1453      // if it throws something else, don't interfere.
1454      try {
1455        (*nh)();
1456      } catch (const std::bad_alloc&) {
1457        return p;
1458      }
1459#endif  // (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS)
1460    } else {  // allocation success
1461      return p;
1462    }
1463#endif  // PREANSINEW
1464  }
1465}
1466
1467}  // end unnamed namespace
1468
1469// As promised, the definition of this function, declared above.
1470size_t TCMallocImplementation::GetAllocatedSize(const void* ptr) {
1471  ASSERT(TCMallocImplementation::GetOwnership(ptr)
1472         != TCMallocImplementation::kNotOwned);
1473  return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize);
1474}
1475
1476void TCMallocImplementation::MarkThreadBusy() {
1477  // Allocate to force the creation of a thread cache, but avoid
1478  // invoking any hooks.
1479  do_free(do_malloc(0));
1480}
1481
1482//-------------------------------------------------------------------
1483// Exported routines
1484//-------------------------------------------------------------------
1485
1486extern "C" PERFTOOLS_DLL_DECL const char* tc_version(
1487    int* major, int* minor, const char** patch) __THROW {
1488  if (major) *major = TC_VERSION_MAJOR;
1489  if (minor) *minor = TC_VERSION_MINOR;
1490  if (patch) *patch = TC_VERSION_PATCH;
1491  return TC_VERSION_STRING;
1492}
1493
1494// This function behaves similarly to MSVC's _set_new_mode.
1495// If flag is 0 (default), calls to malloc will behave normally.
1496// If flag is 1, calls to malloc will behave like calls to new,
1497// and the std_new_handler will be invoked on failure.
1498// Returns the previous mode.
1499extern "C" PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW {
1500  int old_mode = tc_new_mode;
1501  tc_new_mode = flag;
1502  return old_mode;
1503}
1504
1505#ifndef TCMALLOC_USING_DEBUGALLOCATION  // debugallocation.cc defines its own
1506
1507// CAVEAT: The code structure below ensures that MallocHook methods are always
1508//         called from the stack frame of the invoked allocation function.
1509//         heap-checker.cc depends on this to start a stack trace from
1510//         the call to the (de)allocation function.
1511
1512extern "C" PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) __THROW {
1513  void* result = do_malloc_or_cpp_alloc(size);
1514  MallocHook::InvokeNewHook(result, size);
1515  return result;
1516}
1517
1518extern "C" PERFTOOLS_DLL_DECL void tc_free(void* ptr) __THROW {
1519  MallocHook::InvokeDeleteHook(ptr);
1520  do_free(ptr);
1521}
1522
1523extern "C" PERFTOOLS_DLL_DECL void* tc_calloc(size_t n,
1524                                              size_t elem_size) __THROW {
1525  void* result = do_calloc(n, elem_size);
1526  MallocHook::InvokeNewHook(result, n * elem_size);
1527  return result;
1528}
1529
1530extern "C" PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) __THROW {
1531  MallocHook::InvokeDeleteHook(ptr);
1532  do_free(ptr);
1533}
1534
1535extern "C" PERFTOOLS_DLL_DECL void* tc_realloc(void* old_ptr,
1536                                               size_t new_size) __THROW {
1537  if (old_ptr == NULL) {
1538    void* result = do_malloc_or_cpp_alloc(new_size);
1539    MallocHook::InvokeNewHook(result, new_size);
1540    return result;
1541  }
1542  if (new_size == 0) {
1543    MallocHook::InvokeDeleteHook(old_ptr);
1544    do_free(old_ptr);
1545    return NULL;
1546  }
1547  return do_realloc(old_ptr, new_size);
1548}
1549
1550extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) {
1551  void* p = cpp_alloc(size, false);
1552  // We keep this next instruction out of cpp_alloc for a reason: when
1553  // it's in, and new just calls cpp_alloc, the optimizer may fold the
1554  // new call into cpp_alloc, which messes up our whole section-based
1555  // stacktracing (see ATTRIBUTE_SECTION, above).  This ensures cpp_alloc
1556  // isn't the last thing this fn calls, and prevents the folding.
1557  MallocHook::InvokeNewHook(p, size);
1558  return p;
1559}
1560
1561extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW {
1562  void* p = cpp_alloc(size, true);
1563  MallocHook::InvokeNewHook(p, size);
1564  return p;
1565}
1566
1567extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW {
1568  MallocHook::InvokeDeleteHook(p);
1569  do_free(p);
1570}
1571
1572// Standard C++ library implementations define and use this
1573// (via ::operator delete(ptr, nothrow)).
1574// But it's really the same as normal delete, so we just do the same thing.
1575extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) __THROW {
1576  MallocHook::InvokeDeleteHook(p);
1577  do_free(p);
1578}
1579
1580extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) {
1581  void* p = cpp_alloc(size, false);
1582  // We keep this next instruction out of cpp_alloc for a reason: when
1583  // it's in, and new just calls cpp_alloc, the optimizer may fold the
1584  // new call into cpp_alloc, which messes up our whole section-based
1585  // stacktracing (see ATTRIBUTE_SECTION, above).  This ensures cpp_alloc
1586  // isn't the last thing this fn calls, and prevents the folding.
1587  MallocHook::InvokeNewHook(p, size);
1588  return p;
1589}
1590
1591extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&)
1592    __THROW {
1593  void* p = cpp_alloc(size, true);
1594  MallocHook::InvokeNewHook(p, size);
1595  return p;
1596}
1597
1598extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW {
1599  MallocHook::InvokeDeleteHook(p);
1600  do_free(p);
1601}
1602
1603extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) __THROW {
1604  MallocHook::InvokeDeleteHook(p);
1605  do_free(p);
1606}
1607
1608extern "C" PERFTOOLS_DLL_DECL void* tc_memalign(size_t align,
1609                                                size_t size) __THROW {
1610  void* result = do_memalign_or_cpp_memalign(align, size);
1611  MallocHook::InvokeNewHook(result, size);
1612  return result;
1613}
1614
1615extern "C" PERFTOOLS_DLL_DECL int tc_posix_memalign(
1616    void** result_ptr, size_t align, size_t size) __THROW {
1617  if (((align % sizeof(void*)) != 0) ||
1618      ((align & (align - 1)) != 0) ||
1619      (align == 0)) {
1620    return EINVAL;
1621  }
1622
1623  void* result = do_memalign_or_cpp_memalign(align, size);
1624  MallocHook::InvokeNewHook(result, size);
1625  if (result == NULL) {
1626    return ENOMEM;
1627  } else {
1628    *result_ptr = result;
1629    return 0;
1630  }
1631}
1632
1633static size_t pagesize = 0;
1634
1635extern "C" PERFTOOLS_DLL_DECL void* tc_valloc(size_t size) __THROW {
1636  // Allocate page-aligned object of length >= size bytes
1637  if (pagesize == 0) pagesize = getpagesize();
1638  void* result = do_memalign_or_cpp_memalign(pagesize, size);
1639  MallocHook::InvokeNewHook(result, size);
1640  return result;
1641}
1642
1643extern "C" PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t size) __THROW {
1644  // Round up size to a multiple of pagesize
1645  if (pagesize == 0) pagesize = getpagesize();
1646  if (size == 0) {     // pvalloc(0) should allocate one page, according to
1647    size = pagesize;   // http://man.free4web.biz/man3/libmpatrol.3.html
1648  }
1649  size = (size + pagesize - 1) & ~(pagesize - 1);
1650  void* result = do_memalign_or_cpp_memalign(pagesize, size);
1651  MallocHook::InvokeNewHook(result, size);
1652  return result;
1653}
1654
1655extern "C" PERFTOOLS_DLL_DECL void tc_malloc_stats(void) __THROW {
1656  do_malloc_stats();
1657}
1658
1659extern "C" PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) __THROW {
1660  return do_mallopt(cmd, value);
1661}
1662
1663#ifdef HAVE_STRUCT_MALLINFO
1664extern "C" PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) __THROW {
1665  return do_mallinfo();
1666}
1667#endif
1668
1669extern "C" PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) __THROW {
1670  return MallocExtension::instance()->GetAllocatedSize(ptr);
1671}
1672
1673#endif  // TCMALLOC_USING_DEBUGALLOCATION
1674