1// Copyright (c) 2008, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31// Author: Sanjay Ghemawat <opensource@google.com>
32
33#include <config.h>
34#ifdef HAVE_INTTYPES_H
35#include <inttypes.h>                   // for PRIuPTR
36#endif
37#include <gperftools/malloc_extension.h>      // for MallocRange, etc
38#include "base/basictypes.h"
39#include "base/commandlineflags.h"
40#include "internal_logging.h"  // for ASSERT, TCMalloc_Printer, etc
41#include "page_heap_allocator.h"  // for PageHeapAllocator
42#include "static_vars.h"       // for Static
43#include "system-alloc.h"      // for TCMalloc_SystemAlloc, etc
44
45DEFINE_double(tcmalloc_release_rate,
46              EnvToDouble("TCMALLOC_RELEASE_RATE", 1.0),
47              "Rate at which we release unused memory to the system.  "
48              "Zero means we never release memory back to the system.  "
49              "Increase this flag to return memory faster; decrease it "
50              "to return memory slower.  Reasonable rates are in the "
51              "range [0,10]");
52
53namespace tcmalloc {
54
55PageHeap::PageHeap()
56    : pagemap_(MetaDataAlloc),
57      pagemap_cache_(0),
58      scavenge_counter_(0),
59      // Start scavenging at kMaxPages list
60      release_index_(kMaxPages) {
61  COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits);
62  DLL_Init(&large_.normal);
63  DLL_Init(&large_.returned);
64  for (int i = 0; i < kMaxPages; i++) {
65    DLL_Init(&free_[i].normal);
66    DLL_Init(&free_[i].returned);
67  }
68}
69
70Span* PageHeap::SearchFreeAndLargeLists(Length n) {
71  ASSERT(Check());
72  ASSERT(n > 0);
73
74  // Find first size >= n that has a non-empty list
75  for (Length s = n; s < kMaxPages; s++) {
76    Span* ll = &free_[s].normal;
77    // If we're lucky, ll is non-empty, meaning it has a suitable span.
78    if (!DLL_IsEmpty(ll)) {
79      ASSERT(ll->next->location == Span::ON_NORMAL_FREELIST);
80      return Carve(ll->next, n);
81    }
82    // Alternatively, maybe there's a usable returned span.
83    ll = &free_[s].returned;
84    if (!DLL_IsEmpty(ll)) {
85      ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST);
86      return Carve(ll->next, n);
87    }
88  }
89  // No luck in free lists, our last chance is in a larger class.
90  return AllocLarge(n);  // May be NULL
91}
92
93Span* PageHeap::New(Length n) {
94  ASSERT(Check());
95  ASSERT(n > 0);
96
97  Span* result = SearchFreeAndLargeLists(n);
98  if (result != NULL)
99    return result;
100
101  // Grow the heap and try again.
102  if (!GrowHeap(n)) {
103    ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes);
104    ASSERT(Check());
105    return NULL;
106  }
107  return SearchFreeAndLargeLists(n);
108}
109
110Span* PageHeap::AllocLarge(Length n) {
111  // find the best span (closest to n in size).
112  // The following loops implements address-ordered best-fit.
113  Span *best = NULL;
114
115  // Search through normal list
116  for (Span* span = large_.normal.next;
117       span != &large_.normal;
118       span = span->next) {
119    if (span->length >= n) {
120      if ((best == NULL)
121          || (span->length < best->length)
122          || ((span->length == best->length) && (span->start < best->start))) {
123        best = span;
124        ASSERT(best->location == Span::ON_NORMAL_FREELIST);
125      }
126    }
127  }
128
129  // Search through released list in case it has a better fit
130  for (Span* span = large_.returned.next;
131       span != &large_.returned;
132       span = span->next) {
133    if (span->length >= n) {
134      if ((best == NULL)
135          || (span->length < best->length)
136          || ((span->length == best->length) && (span->start < best->start))) {
137        best = span;
138        ASSERT(best->location == Span::ON_RETURNED_FREELIST);
139      }
140    }
141  }
142
143  return best == NULL ? NULL : Carve(best, n);
144}
145
146Span* PageHeap::Split(Span* span, Length n) {
147  ASSERT(0 < n);
148  ASSERT(n < span->length);
149  ASSERT(span->location == Span::IN_USE);
150  ASSERT(span->sizeclass == 0);
151  Event(span, 'T', n);
152
153  const int extra = span->length - n;
154  Span* leftover = NewSpan(span->start + n, extra);
155  ASSERT(leftover->location == Span::IN_USE);
156  Event(leftover, 'U', extra);
157  RecordSpan(leftover);
158  pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
159  span->length = n;
160
161  return leftover;
162}
163
164void PageHeap::CommitSpan(Span* span) {
165  TCMalloc_SystemCommit(reinterpret_cast<void*>(span->start << kPageShift),
166                        static_cast<size_t>(span->length << kPageShift));
167  stats_.committed_bytes += span->length << kPageShift;
168}
169
170void PageHeap::DecommitSpan(Span* span) {
171  TCMalloc_SystemRelease(reinterpret_cast<void*>(span->start << kPageShift),
172                         static_cast<size_t>(span->length << kPageShift));
173  stats_.committed_bytes -= span->length << kPageShift;
174}
175
176Span* PageHeap::Carve(Span* span, Length n) {
177  ASSERT(n > 0);
178  ASSERT(span->location != Span::IN_USE);
179  const int old_location = span->location;
180  RemoveFromFreeList(span);
181  span->location = Span::IN_USE;
182  Event(span, 'A', n);
183
184  const int extra = span->length - n;
185  ASSERT(extra >= 0);
186  if (extra > 0) {
187    Span* leftover = NewSpan(span->start + n, extra);
188    leftover->location = old_location;
189    Event(leftover, 'S', extra);
190    RecordSpan(leftover);
191
192    // The previous span of |leftover| was just splitted -- no need to
193    // coalesce them. The next span of |leftover| was not previously coalesced
194    // with |span|, i.e. is NULL or has got location other than |old_location|.
195    const PageID p = leftover->start;
196    const Length len = leftover->length;
197    Span* next = GetDescriptor(p+len);
198    ASSERT (next == NULL ||
199            next->location == Span::IN_USE ||
200            next->location != leftover->location);
201
202    PrependToFreeList(leftover);  // Skip coalescing - no candidates possible
203    span->length = n;
204    pagemap_.set(span->start + n - 1, span);
205  }
206  ASSERT(Check());
207  if (old_location == Span::ON_RETURNED_FREELIST) {
208    // We need to recommit this address space.
209    CommitSpan(span);
210  }
211  ASSERT(span->location == Span::IN_USE);
212  ASSERT(span->length == n);
213  ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes);
214  return span;
215}
216
217void PageHeap::Delete(Span* span) {
218  ASSERT(Check());
219  ASSERT(span->location == Span::IN_USE);
220  ASSERT(span->length > 0);
221  ASSERT(GetDescriptor(span->start) == span);
222  ASSERT(GetDescriptor(span->start + span->length - 1) == span);
223  const Length n = span->length;
224  span->sizeclass = 0;
225  span->sample = 0;
226  span->location = Span::ON_NORMAL_FREELIST;
227  Event(span, 'D', span->length);
228  MergeIntoFreeList(span);  // Coalesces if possible
229  IncrementalScavenge(n);
230  ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes);
231  ASSERT(Check());
232}
233
234void PageHeap::MergeIntoFreeList(Span* span) {
235  ASSERT(span->location != Span::IN_USE);
236
237  // Coalesce -- we guarantee that "p" != 0, so no bounds checking
238  // necessary.  We do not bother resetting the stale pagemap
239  // entries for the pieces we are merging together because we only
240  // care about the pagemap entries for the boundaries.
241  //
242  // Note that the adjacent spans we merge into "span" may come out of a
243  // "normal" (committed) list, and cleanly merge with our IN_USE span, which
244  // is implicitly committed.  If the adjacents spans are on the "returned"
245  // (decommitted) list, then we must get both spans into the same state before
246  // or after we coalesce them.  The current code always decomits. This is
247  // achieved by blindly decommitting the entire coalesced region, which  may
248  // include any combination of committed and decommitted spans, at the end of
249  // the method.
250
251  // TODO(jar): "Always decommit" causes some extra calls to commit when we are
252  // called in GrowHeap() during an allocation :-/.  We need to eval the cost of
253  // that oscillation, and possibly do something to reduce it.
254
255  // TODO(jar): We need a better strategy for deciding to commit, or decommit,
256  // based on memory usage and free heap sizes.
257
258  const PageID p = span->start;
259  const Length n = span->length;
260  Span* prev = GetDescriptor(p-1);
261  if (prev != NULL && prev->location != Span::IN_USE) {
262    // Merge preceding span into this span
263    ASSERT(prev->start + prev->length == p);
264    const Length len = prev->length;
265    if (prev->location == Span::ON_RETURNED_FREELIST) {
266      // We're about to put the merge span into the returned freelist and call
267      // DecommitSpan() on it, which will mark the entire span including this
268      // one as released and decrease stats_.committed_bytes by the size of the
269      // merged span.  To make the math work out we temporarily increase the
270      // stats_.committed_bytes amount.
271      stats_.committed_bytes += prev->length << kPageShift;
272    }
273    RemoveFromFreeList(prev);
274    DeleteSpan(prev);
275    span->start -= len;
276    span->length += len;
277    pagemap_.set(span->start, span);
278    Event(span, 'L', len);
279  }
280  Span* next = GetDescriptor(p+n);
281  if (next != NULL && next->location != Span::IN_USE) {
282    // Merge next span into this span
283    ASSERT(next->start == p+n);
284    const Length len = next->length;
285    if (next->location == Span::ON_RETURNED_FREELIST) {
286      // See the comment below 'if (prev->location ...' for explanation.
287      stats_.committed_bytes += next->length << kPageShift;
288    }
289    RemoveFromFreeList(next);
290    DeleteSpan(next);
291    span->length += len;
292    pagemap_.set(span->start + span->length - 1, span);
293    Event(span, 'R', len);
294  }
295
296  Event(span, 'D', span->length);
297  span->location = Span::ON_RETURNED_FREELIST;
298  DecommitSpan(span);
299  PrependToFreeList(span);
300}
301
302void PageHeap::PrependToFreeList(Span* span) {
303  ASSERT(span->location != Span::IN_USE);
304  SpanList* list = (span->length < kMaxPages) ? &free_[span->length] : &large_;
305  if (span->location == Span::ON_NORMAL_FREELIST) {
306    stats_.free_bytes += (span->length << kPageShift);
307    DLL_Prepend(&list->normal, span);
308  } else {
309    stats_.unmapped_bytes += (span->length << kPageShift);
310    DLL_Prepend(&list->returned, span);
311  }
312}
313
314void PageHeap::RemoveFromFreeList(Span* span) {
315  ASSERT(span->location != Span::IN_USE);
316  if (span->location == Span::ON_NORMAL_FREELIST) {
317    stats_.free_bytes -= (span->length << kPageShift);
318  } else {
319    stats_.unmapped_bytes -= (span->length << kPageShift);
320  }
321  DLL_Remove(span);
322}
323
324void PageHeap::IncrementalScavenge(Length n) {
325  // Fast path; not yet time to release memory
326  scavenge_counter_ -= n;
327  if (scavenge_counter_ >= 0) return;  // Not yet time to scavenge
328
329  const double rate = FLAGS_tcmalloc_release_rate;
330  if (rate <= 1e-6) {
331    // Tiny release rate means that releasing is disabled.
332    scavenge_counter_ = kDefaultReleaseDelay;
333    return;
334  }
335
336  Length released_pages = ReleaseAtLeastNPages(1);
337
338  if (released_pages == 0) {
339    // Nothing to scavenge, delay for a while.
340    scavenge_counter_ = kDefaultReleaseDelay;
341  } else {
342    // Compute how long to wait until we return memory.
343    // FLAGS_tcmalloc_release_rate==1 means wait for 1000 pages
344    // after releasing one page.
345    const double mult = 1000.0 / rate;
346    double wait = mult * static_cast<double>(released_pages);
347    if (wait > kMaxReleaseDelay) {
348      // Avoid overflow and bound to reasonable range.
349      wait = kMaxReleaseDelay;
350    }
351    scavenge_counter_ = static_cast<int64_t>(wait);
352  }
353}
354
355Length PageHeap::ReleaseLastNormalSpan(SpanList* slist) {
356  Span* s = slist->normal.prev;
357  ASSERT(s->location == Span::ON_NORMAL_FREELIST);
358  RemoveFromFreeList(s);
359  const Length n = s->length;
360  TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
361                         static_cast<size_t>(s->length << kPageShift));
362  s->location = Span::ON_RETURNED_FREELIST;
363  MergeIntoFreeList(s);  // Coalesces if possible.
364  return n;
365}
366
367Length PageHeap::ReleaseAtLeastNPages(Length num_pages) {
368  Length released_pages = 0;
369  Length prev_released_pages = -1;
370
371  // Round robin through the lists of free spans, releasing the last
372  // span in each list.  Stop after releasing at least num_pages.
373  while (released_pages < num_pages) {
374    if (released_pages == prev_released_pages) {
375      // Last iteration of while loop made no progress.
376      break;
377    }
378    prev_released_pages = released_pages;
379
380    for (int i = 0; i < kMaxPages+1 && released_pages < num_pages;
381         i++, release_index_++) {
382      if (release_index_ > kMaxPages) release_index_ = 0;
383      SpanList* slist = (release_index_ == kMaxPages) ?
384          &large_ : &free_[release_index_];
385      if (!DLL_IsEmpty(&slist->normal)) {
386        Length released_len = ReleaseLastNormalSpan(slist);
387        released_pages += released_len;
388      }
389    }
390  }
391  return released_pages;
392}
393
394void PageHeap::RegisterSizeClass(Span* span, size_t sc) {
395  // Associate span object with all interior pages as well
396  ASSERT(span->location == Span::IN_USE);
397  ASSERT(GetDescriptor(span->start) == span);
398  ASSERT(GetDescriptor(span->start+span->length-1) == span);
399  Event(span, 'C', sc);
400  span->sizeclass = sc;
401  for (Length i = 1; i < span->length-1; i++) {
402    pagemap_.set(span->start+i, span);
403  }
404}
405
406void PageHeap::GetSmallSpanStats(SmallSpanStats* result) {
407  for (int s = 0; s < kMaxPages; s++) {
408    result->normal_length[s] = DLL_Length(&free_[s].normal);
409    result->returned_length[s] = DLL_Length(&free_[s].returned);
410  }
411}
412
413void PageHeap::GetLargeSpanStats(LargeSpanStats* result) {
414  result->spans = 0;
415  result->normal_pages = 0;
416  result->returned_pages = 0;
417  for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) {
418    result->normal_pages += s->length;;
419    result->spans++;
420  }
421  for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) {
422    result->returned_pages += s->length;
423    result->spans++;
424  }
425}
426
427bool PageHeap::GetNextRange(PageID start, base::MallocRange* r) {
428  Span* span = reinterpret_cast<Span*>(pagemap_.Next(start));
429  if (span == NULL) {
430    return false;
431  }
432  r->address = span->start << kPageShift;
433  r->length = span->length << kPageShift;
434  r->fraction = 0;
435  switch (span->location) {
436    case Span::IN_USE:
437      r->type = base::MallocRange::INUSE;
438      r->fraction = 1;
439      if (span->sizeclass > 0) {
440        // Only some of the objects in this span may be in use.
441        const size_t osize = Static::sizemap()->class_to_size(span->sizeclass);
442        r->fraction = (1.0 * osize * span->refcount) / r->length;
443      }
444      break;
445    case Span::ON_NORMAL_FREELIST:
446      r->type = base::MallocRange::FREE;
447      break;
448    case Span::ON_RETURNED_FREELIST:
449      r->type = base::MallocRange::UNMAPPED;
450      break;
451    default:
452      r->type = base::MallocRange::UNKNOWN;
453      break;
454  }
455  return true;
456}
457
458static void RecordGrowth(size_t growth) {
459  StackTrace* t = Static::stacktrace_allocator()->New();
460  t->depth = GetStackTrace(t->stack, kMaxStackDepth-1, 3);
461  t->size = growth;
462  t->stack[kMaxStackDepth-1] = reinterpret_cast<void*>(Static::growth_stacks());
463  Static::set_growth_stacks(t);
464}
465
466bool PageHeap::GrowHeap(Length n) {
467  ASSERT(kMaxPages >= kMinSystemAlloc);
468  if (n > kMaxValidPages) return false;
469  Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc);
470  size_t actual_size;
471  void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
472  if (ptr == NULL) {
473    if (n < ask) {
474      // Try growing just "n" pages
475      ask = n;
476      ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
477    }
478    if (ptr == NULL) return false;
479  }
480  ask = actual_size >> kPageShift;
481  RecordGrowth(ask << kPageShift);
482
483  uint64_t old_system_bytes = stats_.system_bytes;
484  stats_.system_bytes += (ask << kPageShift);
485  stats_.committed_bytes += (ask << kPageShift);
486  const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
487  ASSERT(p > 0);
488
489  // If we have already a lot of pages allocated, just pre allocate a bunch of
490  // memory for the page map. This prevents fragmentation by pagemap metadata
491  // when a program keeps allocating and freeing large blocks.
492
493  if (old_system_bytes < kPageMapBigAllocationThreshold
494      && stats_.system_bytes >= kPageMapBigAllocationThreshold) {
495    pagemap_.PreallocateMoreMemory();
496  }
497
498  // Make sure pagemap_ has entries for all of the new pages.
499  // Plus ensure one before and one after so coalescing code
500  // does not need bounds-checking.
501  if (pagemap_.Ensure(p-1, ask+2)) {
502    // Pretend the new area is allocated and then Delete() it to cause
503    // any necessary coalescing to occur.
504    Span* span = NewSpan(p, ask);
505    RecordSpan(span);
506    Delete(span);
507    ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes);
508    ASSERT(Check());
509    return true;
510  } else {
511    // We could not allocate memory within "pagemap_"
512    // TODO: Once we can return memory to the system, return the new span
513    return false;
514  }
515}
516
517bool PageHeap::Check() {
518  ASSERT(free_[0].normal.next == &free_[0].normal);
519  ASSERT(free_[0].returned.next == &free_[0].returned);
520  return true;
521}
522
523bool PageHeap::CheckExpensive() {
524  bool result = Check();
525  CheckList(&large_.normal, kMaxPages, 1000000000, Span::ON_NORMAL_FREELIST);
526  CheckList(&large_.returned, kMaxPages, 1000000000, Span::ON_RETURNED_FREELIST);
527  for (Length s = 1; s < kMaxPages; s++) {
528    CheckList(&free_[s].normal, s, s, Span::ON_NORMAL_FREELIST);
529    CheckList(&free_[s].returned, s, s, Span::ON_RETURNED_FREELIST);
530  }
531  return result;
532}
533
534bool PageHeap::CheckList(Span* list, Length min_pages, Length max_pages,
535                         int freelist) {
536  for (Span* s = list->next; s != list; s = s->next) {
537    CHECK_CONDITION(s->location == freelist);  // NORMAL or RETURNED
538    CHECK_CONDITION(s->length >= min_pages);
539    CHECK_CONDITION(s->length <= max_pages);
540    CHECK_CONDITION(GetDescriptor(s->start) == s);
541    CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s);
542  }
543  return true;
544}
545
546}  // namespace tcmalloc
547