1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
18#define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
19
20#include <stdint.h>
21#include <stdlib.h>
22#include <sys/mman.h>
23#include <memory>
24#include <set>
25#include <string>
26#include <unordered_set>
27#include <vector>
28
29#include <android-base/logging.h>
30
31#include "base/allocator.h"
32#include "base/bit_utils.h"
33#include "base/mutex.h"
34#include "globals.h"
35#include "thread.h"
36
37namespace art {
38
39class MemMap;
40
41namespace gc {
42namespace allocator {
43
44// A runs-of-slots memory allocator.
45class RosAlloc {
46 private:
47  // Represents a run of free pages.
48  class FreePageRun {
49   public:
50    uint8_t magic_num_;  // The magic number used for debugging only.
51
52    bool IsFree() const {
53      return !kIsDebugBuild || magic_num_ == kMagicNumFree;
54    }
55    size_t ByteSize(RosAlloc* rosalloc) const REQUIRES(rosalloc->lock_) {
56      const uint8_t* fpr_base = reinterpret_cast<const uint8_t*>(this);
57      size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
58      size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx];
59      DCHECK_GE(byte_size, static_cast<size_t>(0));
60      DCHECK_ALIGNED(byte_size, kPageSize);
61      return byte_size;
62    }
63    void SetByteSize(RosAlloc* rosalloc, size_t byte_size)
64        REQUIRES(rosalloc->lock_) {
65      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
66      uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this);
67      size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
68      rosalloc->free_page_run_size_map_[pm_idx] = byte_size;
69    }
70    void* Begin() {
71      return reinterpret_cast<void*>(this);
72    }
73    void* End(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
74      uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this);
75      uint8_t* end = fpr_base + ByteSize(rosalloc);
76      return end;
77    }
78    bool IsLargerThanPageReleaseThreshold(RosAlloc* rosalloc)
79        REQUIRES(rosalloc->lock_) {
80      return ByteSize(rosalloc) >= rosalloc->page_release_size_threshold_;
81    }
82    bool IsAtEndOfSpace(RosAlloc* rosalloc)
83        REQUIRES(rosalloc->lock_) {
84      return reinterpret_cast<uint8_t*>(this) + ByteSize(rosalloc) == rosalloc->base_ + rosalloc->footprint_;
85    }
86    bool ShouldReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
87      switch (rosalloc->page_release_mode_) {
88        case kPageReleaseModeNone:
89          return false;
90        case kPageReleaseModeEnd:
91          return IsAtEndOfSpace(rosalloc);
92        case kPageReleaseModeSize:
93          return IsLargerThanPageReleaseThreshold(rosalloc);
94        case kPageReleaseModeSizeAndEnd:
95          return IsLargerThanPageReleaseThreshold(rosalloc) && IsAtEndOfSpace(rosalloc);
96        case kPageReleaseModeAll:
97          return true;
98        default:
99          LOG(FATAL) << "Unexpected page release mode ";
100          return false;
101      }
102    }
103    void ReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
104      uint8_t* start = reinterpret_cast<uint8_t*>(this);
105      size_t byte_size = ByteSize(rosalloc);
106      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
107      if (ShouldReleasePages(rosalloc)) {
108        rosalloc->ReleasePageRange(start, start + byte_size);
109      }
110    }
111
112   private:
113    DISALLOW_COPY_AND_ASSIGN(FreePageRun);
114  };
115
116  // The slot header.
117  class Slot {
118   public:
119    Slot* Next() const {
120      return next_;
121    }
122    void SetNext(Slot* next) {
123      next_ = next;
124    }
125    // The slot right before this slot in terms of the address.
126    Slot* Left(size_t bracket_size) {
127      return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) - bracket_size);
128    }
129    void Clear() {
130      next_ = nullptr;
131    }
132
133   private:
134    Slot* next_;  // Next slot in the list.
135    friend class RosAlloc;
136  };
137
138  // We use the tail (kUseTail == true) for the bulk or thread-local free lists to avoid the need to
139  // traverse the list from the head to the tail when merging free lists.
140  // We don't use the tail (kUseTail == false) for the free list to avoid the need to manage the
141  // tail in the allocation fast path for a performance reason.
142  template<bool kUseTail = true>
143  class SlotFreeList {
144   public:
145    SlotFreeList() : head_(0U), tail_(0), size_(0), padding_(0) {}
146    Slot* Head() const {
147      return reinterpret_cast<Slot*>(head_);
148    }
149    Slot* Tail() const {
150      CHECK(kUseTail);
151      return reinterpret_cast<Slot*>(tail_);
152    }
153    size_t Size() const {
154      return size_;
155    }
156    // Removes from the head of the free list.
157    Slot* Remove() {
158      Slot* slot;
159      if (kIsDebugBuild) {
160        Verify();
161      }
162      Slot** headp = reinterpret_cast<Slot**>(&head_);
163      Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
164      Slot* old_head = *headp;
165      if (old_head == nullptr) {
166        // List was empty.
167        if (kUseTail) {
168          DCHECK(*tailp == nullptr);
169        }
170        return nullptr;
171      } else {
172        // List wasn't empty.
173        if (kUseTail) {
174          DCHECK(*tailp != nullptr);
175        }
176        Slot* old_head_next = old_head->Next();
177        slot = old_head;
178        *headp = old_head_next;
179        if (kUseTail && old_head_next == nullptr) {
180          // List becomes empty.
181          *tailp = nullptr;
182        }
183      }
184      slot->Clear();
185      --size_;
186      if (kIsDebugBuild) {
187        Verify();
188      }
189      return slot;
190    }
191    void Add(Slot* slot) {
192      if (kIsDebugBuild) {
193        Verify();
194      }
195      DCHECK(slot != nullptr);
196      DCHECK(slot->Next() == nullptr);
197      Slot** headp = reinterpret_cast<Slot**>(&head_);
198      Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
199      Slot* old_head = *headp;
200      if (old_head == nullptr) {
201        // List was empty.
202        if (kUseTail) {
203          DCHECK(*tailp == nullptr);
204        }
205        *headp = slot;
206        if (kUseTail) {
207          *tailp = slot;
208        }
209      } else {
210        // List wasn't empty.
211        if (kUseTail) {
212          DCHECK(*tailp != nullptr);
213        }
214        *headp = slot;
215        slot->SetNext(old_head);
216      }
217      ++size_;
218      if (kIsDebugBuild) {
219        Verify();
220      }
221    }
222    // Merge the given list into this list. Empty the given list.
223    // Deliberately support only a kUseTail == true SlotFreeList parameter because 1) we don't
224    // currently have a situation where we need a kUseTail == false SlotFreeList parameter, and 2)
225    // supporting the kUseTail == false parameter would require a O(n) linked list traversal to do
226    // the merge if 'this' SlotFreeList has kUseTail == false, which we'd like to avoid.
227    void Merge(SlotFreeList<true>* list) {
228      if (kIsDebugBuild) {
229        Verify();
230        CHECK(list != nullptr);
231        list->Verify();
232      }
233      if (list->Size() == 0) {
234        return;
235      }
236      Slot** headp = reinterpret_cast<Slot**>(&head_);
237      Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
238      Slot* old_head = *headp;
239      if (old_head == nullptr) {
240        // List was empty.
241        *headp = list->Head();
242        if (kUseTail) {
243          *tailp = list->Tail();
244        }
245        size_ = list->Size();
246      } else {
247        // List wasn't empty.
248        DCHECK(list->Head() != nullptr);
249        *headp = list->Head();
250        DCHECK(list->Tail() != nullptr);
251        list->Tail()->SetNext(old_head);
252        // if kUseTail, no change to tailp.
253        size_ += list->Size();
254      }
255      list->Reset();
256      if (kIsDebugBuild) {
257        Verify();
258      }
259    }
260
261    void Reset() {
262      head_ = 0;
263      if (kUseTail) {
264        tail_ = 0;
265      }
266      size_ = 0;
267    }
268
269    void Verify() {
270      Slot* head = reinterpret_cast<Slot*>(head_);
271      Slot* tail = kUseTail ? reinterpret_cast<Slot*>(tail_) : nullptr;
272      if (size_ == 0) {
273        CHECK(head == nullptr);
274        if (kUseTail) {
275          CHECK(tail == nullptr);
276        }
277      } else {
278        CHECK(head != nullptr);
279        if (kUseTail) {
280          CHECK(tail != nullptr);
281        }
282        size_t count = 0;
283        for (Slot* slot = head; slot != nullptr; slot = slot->Next()) {
284          ++count;
285          if (kUseTail && slot->Next() == nullptr) {
286            CHECK_EQ(slot, tail);
287          }
288        }
289        CHECK_EQ(size_, count);
290      }
291    }
292
293   private:
294    // A pointer (Slot*) to the head of the list. Always 8 bytes so that we will have the same
295    // layout between 32 bit and 64 bit, which is not strictly necessary, but we do so for 1)
296    // uniformity, 2) we won't need to change this code if we move to a non-low 4G heap in the
297    // future, and 3) the space savings by using 32 bit fields in 32 bit would be lost in noise
298    // (won't open up enough space to cause an extra slot to be available).
299    uint64_t head_;
300    // A pointer (Slot*) to the tail of the list. Always 8 bytes so that we will have the same
301    // layout between 32 bit and 64 bit. The tail is stored to speed up merging of lists.
302    // Unused if kUseTail is false.
303    uint64_t tail_;
304    // The number of slots in the list. This is used to make it fast to check if a free list is all
305    // free without traversing the whole free list.
306    uint32_t size_;
307    uint32_t padding_ ATTRIBUTE_UNUSED;
308    friend class RosAlloc;
309  };
310
311  // Represents a run of memory slots of the same size.
312  //
313  // A run's memory layout:
314  //
315  // +-------------------+
316  // | magic_num         |
317  // +-------------------+
318  // | size_bracket_idx  |
319  // +-------------------+
320  // | is_thread_local   |
321  // +-------------------+
322  // | to_be_bulk_freed  |
323  // +-------------------+
324  // |                   |
325  // | free list         |
326  // |                   |
327  // +-------------------+
328  // |                   |
329  // | bulk free list    |
330  // |                   |
331  // +-------------------+
332  // |                   |
333  // | thread-local free |
334  // | list              |
335  // |                   |
336  // +-------------------+
337  // | padding due to    |
338  // | alignment         |
339  // +-------------------+
340  // | slot 0            |
341  // +-------------------+
342  // | slot 1            |
343  // +-------------------+
344  // | slot 2            |
345  // +-------------------+
346  // ...
347  // +-------------------+
348  // | last slot         |
349  // +-------------------+
350  //
351  class Run {
352   public:
353    uint8_t magic_num_;                 // The magic number used for debugging.
354    uint8_t size_bracket_idx_;          // The index of the size bracket of this run.
355    uint8_t is_thread_local_;           // True if this run is used as a thread-local run.
356    uint8_t to_be_bulk_freed_;          // Used within BulkFree() to flag a run that's involved with a bulk free.
357    uint32_t padding_ ATTRIBUTE_UNUSED;
358    // Use a tailless free list for free_list_ so that the alloc fast path does not manage the tail.
359    SlotFreeList<false> free_list_;
360    SlotFreeList<true> bulk_free_list_;
361    SlotFreeList<true> thread_local_free_list_;
362    // Padding due to alignment
363    // Slot 0
364    // Slot 1
365    // ...
366
367    // Returns the byte size of the header.
368    static size_t fixed_header_size() {
369      return sizeof(Run);
370    }
371    Slot* FirstSlot() const {
372      const uint8_t idx = size_bracket_idx_;
373      return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) + headerSizes[idx]);
374    }
375    Slot* LastSlot() {
376      const uint8_t idx = size_bracket_idx_;
377      const size_t bracket_size = bracketSizes[idx];
378      uintptr_t end = reinterpret_cast<uintptr_t>(End());
379      Slot* last_slot = reinterpret_cast<Slot*>(end - bracket_size);
380      DCHECK_LE(FirstSlot(), last_slot);
381      return last_slot;
382    }
383    SlotFreeList<false>* FreeList() {
384      return &free_list_;
385    }
386    SlotFreeList<true>* BulkFreeList() {
387      return &bulk_free_list_;
388    }
389    SlotFreeList<true>* ThreadLocalFreeList() {
390      return &thread_local_free_list_;
391    }
392    void* End() {
393      return reinterpret_cast<uint8_t*>(this) + kPageSize * numOfPages[size_bracket_idx_];
394    }
395    void SetIsThreadLocal(bool is_thread_local) {
396      is_thread_local_  = is_thread_local ? 1 : 0;
397    }
398    bool IsThreadLocal() const {
399      return is_thread_local_ != 0;
400    }
401    // Set up the free list for a new/empty run.
402    void InitFreeList() {
403      const uint8_t idx = size_bracket_idx_;
404      const size_t bracket_size = bracketSizes[idx];
405      Slot* first_slot = FirstSlot();
406      // Add backwards so the first slot is at the head of the list.
407      for (Slot* slot = LastSlot(); slot >= first_slot; slot = slot->Left(bracket_size)) {
408        free_list_.Add(slot);
409      }
410    }
411    // Merge the thread local free list to the free list.  Used when a thread-local run becomes
412    // full.
413    bool MergeThreadLocalFreeListToFreeList(bool* is_all_free_after_out);
414    // Merge the bulk free list to the free list. Used in a bulk free.
415    void MergeBulkFreeListToFreeList();
416    // Merge the bulk free list to the thread local free list. In a bulk free, as a two-step
417    // process, GC will first record all the slots to free in a run in the bulk free list where it
418    // can write without a lock, and later acquire a lock once per run to merge the bulk free list
419    // to the thread-local free list.
420    void MergeBulkFreeListToThreadLocalFreeList();
421    // Allocates a slot in a run.
422    ALWAYS_INLINE void* AllocSlot();
423    // Frees a slot in a run. This is used in a non-bulk free.
424    void FreeSlot(void* ptr);
425    // Add the given slot to the bulk free list. Returns the bracket size.
426    size_t AddToBulkFreeList(void* ptr);
427    // Add the given slot to the thread-local free list.
428    void AddToThreadLocalFreeList(void* ptr);
429    // Returns true if all the slots in the run are not in use.
430    bool IsAllFree() const {
431      return free_list_.Size() == numOfSlots[size_bracket_idx_];
432    }
433    // Returns the number of free slots.
434    size_t NumberOfFreeSlots() {
435      return free_list_.Size();
436    }
437    // Returns true if all the slots in the run are in use.
438    ALWAYS_INLINE bool IsFull();
439    // Returns true if the bulk free list is empty.
440    bool IsBulkFreeListEmpty() const {
441      return bulk_free_list_.Size() == 0;
442    }
443    // Returns true if the thread local free list is empty.
444    bool IsThreadLocalFreeListEmpty() const {
445      return thread_local_free_list_.Size() == 0;
446    }
447    // Zero the run's data.
448    void ZeroData();
449    // Zero the run's header and the slot headers.
450    void ZeroHeaderAndSlotHeaders();
451    // Iterate over all the slots and apply the given function.
452    void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
453    // Dump the run metadata for debugging.
454    std::string Dump();
455    // Verify for debugging.
456    void Verify(Thread* self, RosAlloc* rosalloc, bool running_on_memory_tool)
457        REQUIRES(Locks::mutator_lock_)
458        REQUIRES(Locks::thread_list_lock_);
459
460   private:
461    // The common part of AddToBulkFreeList() and AddToThreadLocalFreeList(). Returns the bracket
462    // size.
463    size_t AddToFreeListShared(void* ptr, SlotFreeList<true>* free_list, const char* caller_name);
464    // Turns a FreeList into a string for debugging.
465    template<bool kUseTail>
466    std::string FreeListToStr(SlotFreeList<kUseTail>* free_list);
467    // Check a given pointer is a valid slot address and return it as Slot*.
468    Slot* ToSlot(void* ptr) {
469      const uint8_t idx = size_bracket_idx_;
470      const size_t bracket_size = bracketSizes[idx];
471      const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr)
472          - reinterpret_cast<uint8_t*>(FirstSlot());
473      DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
474      size_t slot_idx = offset_from_slot_base / bracket_size;
475      DCHECK_LT(slot_idx, numOfSlots[idx]);
476      return reinterpret_cast<Slot*>(ptr);
477    }
478    size_t SlotIndex(Slot* slot) const {
479      const uint8_t idx = size_bracket_idx_;
480      const size_t bracket_size = bracketSizes[idx];
481      const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot)
482          - reinterpret_cast<uint8_t*>(FirstSlot());
483      DCHECK_EQ(offset_from_slot_base % bracket_size, 0U);
484      size_t slot_idx = offset_from_slot_base / bracket_size;
485      DCHECK_LT(slot_idx, numOfSlots[idx]);
486      return slot_idx;
487    }
488
489    // TODO: DISALLOW_COPY_AND_ASSIGN(Run);
490  };
491
492  // The magic number for a run.
493  static constexpr uint8_t kMagicNum = 42;
494  // The magic number for free pages.
495  static constexpr uint8_t kMagicNumFree = 43;
496  // The number of size brackets.
497  static constexpr size_t kNumOfSizeBrackets = 42;
498  // The sizes (the slot sizes, in bytes) of the size brackets.
499  static size_t bracketSizes[kNumOfSizeBrackets];
500  // The numbers of pages that are used for runs for each size bracket.
501  static size_t numOfPages[kNumOfSizeBrackets];
502  // The numbers of slots of the runs for each size bracket.
503  static size_t numOfSlots[kNumOfSizeBrackets];
504  // The header sizes in bytes of the runs for each size bracket.
505  static size_t headerSizes[kNumOfSizeBrackets];
506
507  // Initialize the run specs (the above arrays).
508  static void Initialize();
509  static bool initialized_;
510
511  // Returns the byte size of the bracket size from the index.
512  static size_t IndexToBracketSize(size_t idx) {
513    DCHECK_LT(idx, kNumOfSizeBrackets);
514    return bracketSizes[idx];
515  }
516  // Returns the index of the size bracket from the bracket size.
517  static size_t BracketSizeToIndex(size_t size) {
518    DCHECK(8 <= size &&
519           ((size <= kMaxThreadLocalBracketSize && size % kThreadLocalBracketQuantumSize == 0) ||
520            (size <= kMaxRegularBracketSize && size % kBracketQuantumSize == 0) ||
521            size == 1 * KB || size == 2 * KB));
522    size_t idx;
523    if (UNLIKELY(size == 1 * KB)) {
524      idx = kNumOfSizeBrackets - 2;
525    } else if (UNLIKELY(size == 2 * KB)) {
526      idx = kNumOfSizeBrackets - 1;
527    } else if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
528      DCHECK_EQ(size % kThreadLocalBracketQuantumSize, 0U);
529      idx = size / kThreadLocalBracketQuantumSize - 1;
530    } else {
531      DCHECK(size <= kMaxRegularBracketSize);
532      DCHECK_EQ((size - kMaxThreadLocalBracketSize) % kBracketQuantumSize, 0U);
533      idx = ((size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
534          + kNumThreadLocalSizeBrackets;
535    }
536    DCHECK(bracketSizes[idx] == size);
537    return idx;
538  }
539  // Returns true if the given allocation size is for a thread local allocation.
540  static bool IsSizeForThreadLocal(size_t size) {
541    bool is_size_for_thread_local = size <= kMaxThreadLocalBracketSize;
542    DCHECK(size > kLargeSizeThreshold ||
543           (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets)));
544    return is_size_for_thread_local;
545  }
546  // Rounds up the size up the nearest bracket size.
547  static size_t RoundToBracketSize(size_t size) {
548    DCHECK(size <= kLargeSizeThreshold);
549    if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
550      return RoundUp(size, kThreadLocalBracketQuantumSize);
551    } else if (size <= kMaxRegularBracketSize) {
552      return RoundUp(size, kBracketQuantumSize);
553    } else if (UNLIKELY(size <= 1 * KB)) {
554      return 1 * KB;
555    } else {
556      DCHECK_LE(size, 2 * KB);
557      return 2 * KB;
558    }
559  }
560  // Returns the size bracket index from the byte size with rounding.
561  static size_t SizeToIndex(size_t size) {
562    DCHECK(size <= kLargeSizeThreshold);
563    if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
564      return RoundUp(size, kThreadLocalBracketQuantumSize) / kThreadLocalBracketQuantumSize - 1;
565    } else if (size <= kMaxRegularBracketSize) {
566      return (RoundUp(size, kBracketQuantumSize) - kMaxThreadLocalBracketSize) / kBracketQuantumSize
567          - 1 + kNumThreadLocalSizeBrackets;
568    } else if (size <= 1 * KB) {
569      return kNumOfSizeBrackets - 2;
570    } else {
571      DCHECK_LE(size, 2 * KB);
572      return kNumOfSizeBrackets - 1;
573    }
574  }
575  // A combination of SizeToIndex() and RoundToBracketSize().
576  static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) {
577    DCHECK(size <= kLargeSizeThreshold);
578    size_t idx;
579    size_t bracket_size;
580    if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
581      bracket_size = RoundUp(size, kThreadLocalBracketQuantumSize);
582      idx = bracket_size / kThreadLocalBracketQuantumSize - 1;
583    } else if (size <= kMaxRegularBracketSize) {
584      bracket_size = RoundUp(size, kBracketQuantumSize);
585      idx = ((bracket_size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
586          + kNumThreadLocalSizeBrackets;
587    } else if (size <= 1 * KB) {
588      bracket_size = 1 * KB;
589      idx = kNumOfSizeBrackets - 2;
590    } else {
591      DCHECK(size <= 2 * KB);
592      bracket_size = 2 * KB;
593      idx = kNumOfSizeBrackets - 1;
594    }
595    DCHECK_EQ(idx, SizeToIndex(size)) << idx;
596    DCHECK_EQ(bracket_size, IndexToBracketSize(idx)) << idx;
597    DCHECK_EQ(bracket_size, bracketSizes[idx]) << idx;
598    DCHECK_LE(size, bracket_size) << idx;
599    DCHECK(size > kMaxRegularBracketSize ||
600           (size <= kMaxThreadLocalBracketSize &&
601            bracket_size - size < kThreadLocalBracketQuantumSize) ||
602           (size <= kMaxRegularBracketSize && bracket_size - size < kBracketQuantumSize)) << idx;
603    *bracket_size_out = bracket_size;
604    return idx;
605  }
606
607  // Returns the page map index from an address. Requires that the
608  // address is page size aligned.
609  size_t ToPageMapIndex(const void* addr) const {
610    DCHECK_LE(base_, addr);
611    DCHECK_LT(addr, base_ + capacity_);
612    size_t byte_offset = reinterpret_cast<const uint8_t*>(addr) - base_;
613    DCHECK_EQ(byte_offset % static_cast<size_t>(kPageSize), static_cast<size_t>(0));
614    return byte_offset / kPageSize;
615  }
616  // Returns the page map index from an address with rounding.
617  size_t RoundDownToPageMapIndex(const void* addr) const {
618    DCHECK(base_ <= addr && addr < reinterpret_cast<uint8_t*>(base_) + capacity_);
619    return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize;
620  }
621
622  // A memory allocation request larger than this size is treated as a large object and allocated
623  // at a page-granularity.
624  static const size_t kLargeSizeThreshold = 2048;
625
626  // If true, check that the returned memory is actually zero.
627  static constexpr bool kCheckZeroMemory = kIsDebugBuild;
628  // Valgrind protects memory, so do not check memory when running under valgrind. In a normal
629  // build with kCheckZeroMemory the whole test should be optimized away.
630  // TODO: Unprotect before checks.
631  ALWAYS_INLINE bool ShouldCheckZeroMemory();
632
633  // If true, log verbose details of operations.
634  static constexpr bool kTraceRosAlloc = false;
635
636  struct hash_run {
637    size_t operator()(const RosAlloc::Run* r) const {
638      return reinterpret_cast<size_t>(r);
639    }
640  };
641
642  struct eq_run {
643    bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const {
644      return r1 == r2;
645    }
646  };
647
648 public:
649  // Different page release modes.
650  enum PageReleaseMode {
651    kPageReleaseModeNone,         // Release no empty pages.
652    kPageReleaseModeEnd,          // Release empty pages at the end of the space.
653    kPageReleaseModeSize,         // Release empty pages that are larger than the threshold.
654    kPageReleaseModeSizeAndEnd,   // Release empty pages that are larger than the threshold or
655                                  // at the end of the space.
656    kPageReleaseModeAll,          // Release all empty pages.
657  };
658
659  // The default value for page_release_size_threshold_.
660  static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB;
661
662  // We use thread-local runs for the size brackets whose indexes
663  // are less than this index. We use shared (current) runs for the rest.
664  // Sync this with the length of Thread::rosalloc_runs_.
665  static const size_t kNumThreadLocalSizeBrackets = 16;
666  static_assert(kNumThreadLocalSizeBrackets == kNumRosAllocThreadLocalSizeBracketsInThread,
667                "Mismatch between kNumThreadLocalSizeBrackets and "
668                "kNumRosAllocThreadLocalSizeBracketsInThread");
669
670  // The size of the largest bracket we use thread-local runs for.
671  // This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1].
672  static const size_t kMaxThreadLocalBracketSize = 128;
673
674  // We use regular (8 or 16-bytes increment) runs for the size brackets whose indexes are less than
675  // this index.
676  static const size_t kNumRegularSizeBrackets = 40;
677
678  // The size of the largest regular (8 or 16-byte increment) bracket. Non-regular brackets are the
679  // 1 KB and the 2 KB brackets. This should be equal to bracketSizes[kNumRegularSizeBrackets - 1].
680  static const size_t kMaxRegularBracketSize = 512;
681
682  // The bracket size increment for the thread-local brackets (<= kMaxThreadLocalBracketSize bytes).
683  static constexpr size_t kThreadLocalBracketQuantumSize = 8;
684
685  // Equal to Log2(kThreadLocalBracketQuantumSize).
686  static constexpr size_t kThreadLocalBracketQuantumSizeShift = 3;
687
688  // The bracket size increment for the non-thread-local, regular brackets (of size <=
689  // kMaxRegularBracketSize bytes and > kMaxThreadLocalBracketSize bytes).
690  static constexpr size_t kBracketQuantumSize = 16;
691
692  // Equal to Log2(kBracketQuantumSize).
693  static constexpr size_t kBracketQuantumSizeShift = 4;
694
695 private:
696  // The base address of the memory region that's managed by this allocator.
697  uint8_t* base_;
698
699  // The footprint in bytes of the currently allocated portion of the
700  // memory region.
701  size_t footprint_;
702
703  // The maximum footprint. The address, base_ + capacity_, indicates
704  // the end of the memory region that's currently managed by this allocator.
705  size_t capacity_;
706
707  // The maximum capacity. The address, base_ + max_capacity_, indicates
708  // the end of the memory region that's ever managed by this allocator.
709  size_t max_capacity_;
710
711  template<class Key, AllocatorTag kTag, class Compare = std::less<Key>>
712  using AllocationTrackingSet = std::set<Key, Compare, TrackingAllocator<Key, kTag>>;
713
714  // The run sets that hold the runs whose slots are not all
715  // full. non_full_runs_[i] is guarded by size_bracket_locks_[i].
716  AllocationTrackingSet<Run*, kAllocatorTagRosAlloc> non_full_runs_[kNumOfSizeBrackets];
717  // The run sets that hold the runs whose slots are all full. This is
718  // debug only. full_runs_[i] is guarded by size_bracket_locks_[i].
719  std::unordered_set<Run*, hash_run, eq_run, TrackingAllocator<Run*, kAllocatorTagRosAlloc>>
720      full_runs_[kNumOfSizeBrackets];
721  // The set of free pages.
722  AllocationTrackingSet<FreePageRun*, kAllocatorTagRosAlloc> free_page_runs_ GUARDED_BY(lock_);
723  // The dedicated full run, it is always full and shared by all threads when revoking happens.
724  // This is an optimization since enables us to avoid a null check for revoked runs.
725  static Run* dedicated_full_run_;
726  // Using size_t to ensure that it is at least word aligned.
727  static size_t dedicated_full_run_storage_[];
728  // The current runs where the allocations are first attempted for
729  // the size brackes that do not use thread-local
730  // runs. current_runs_[i] is guarded by size_bracket_locks_[i].
731  Run* current_runs_[kNumOfSizeBrackets];
732  // The mutexes, one per size bracket.
733  Mutex* size_bracket_locks_[kNumOfSizeBrackets];
734  // Bracket lock names (since locks only have char* names).
735  std::string size_bracket_lock_names_[kNumOfSizeBrackets];
736  // The types of page map entries.
737  enum PageMapKind {
738    kPageMapReleased = 0,     // Zero and released back to the OS.
739    kPageMapEmpty,            // Zero but probably dirty.
740    kPageMapRun,              // The beginning of a run.
741    kPageMapRunPart,          // The non-beginning part of a run.
742    kPageMapLargeObject,      // The beginning of a large object.
743    kPageMapLargeObjectPart,  // The non-beginning part of a large object.
744  };
745  // The table that indicates what pages are currently used for.
746  volatile uint8_t* page_map_;  // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree.
747  size_t page_map_size_;
748  size_t max_page_map_size_;
749  std::unique_ptr<MemMap> page_map_mem_map_;
750
751  // The table that indicates the size of free page runs. These sizes
752  // are stored here to avoid storing in the free page header and
753  // release backing pages.
754  std::vector<size_t, TrackingAllocator<size_t, kAllocatorTagRosAlloc>> free_page_run_size_map_
755      GUARDED_BY(lock_);
756  // The global lock. Used to guard the page map, the free page set,
757  // and the footprint.
758  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
759  // The reader-writer lock to allow one bulk free at a time while
760  // allowing multiple individual frees at the same time. Also, this
761  // is used to avoid race conditions between BulkFree() and
762  // RevokeThreadLocalRuns() on the bulk free list.
763  ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
764
765  // The page release mode.
766  const PageReleaseMode page_release_mode_;
767  // Under kPageReleaseModeSize(AndEnd), if the free page run size is
768  // greater than or equal to this value, release pages.
769  const size_t page_release_size_threshold_;
770
771  // Whether this allocator is running under Valgrind.
772  bool is_running_on_memory_tool_;
773
774  // The base address of the memory region that's managed by this allocator.
775  uint8_t* Begin() { return base_; }
776  // The end address of the memory region that's managed by this allocator.
777  uint8_t* End() { return base_ + capacity_; }
778
779  // Page-granularity alloc/free
780  void* AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type)
781      REQUIRES(lock_);
782  // Returns how many bytes were freed.
783  size_t FreePages(Thread* self, void* ptr, bool already_zero) REQUIRES(lock_);
784
785  // Allocate/free a run slot.
786  void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size,
787                     size_t* bytes_tl_bulk_allocated)
788      REQUIRES(!lock_);
789  // Allocate/free a run slot without acquiring locks.
790  // TODO: REQUIRES(Locks::mutator_lock_)
791  void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated,
792                                 size_t* usable_size, size_t* bytes_tl_bulk_allocated)
793      REQUIRES(!lock_);
794  void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx) REQUIRES(!lock_);
795
796  // Returns the bracket size.
797  size_t FreeFromRun(Thread* self, void* ptr, Run* run)
798      REQUIRES(!lock_);
799
800  // Used to allocate a new thread local run for a size bracket.
801  Run* AllocRun(Thread* self, size_t idx) REQUIRES(!lock_);
802
803  // Used to acquire a new/reused run for a size bracket. Used when a
804  // thread-local or current run gets full.
805  Run* RefillRun(Thread* self, size_t idx) REQUIRES(!lock_);
806
807  // The internal of non-bulk Free().
808  size_t FreeInternal(Thread* self, void* ptr) REQUIRES(!lock_);
809
810  // Allocates large objects.
811  void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated,
812                         size_t* usable_size, size_t* bytes_tl_bulk_allocated)
813      REQUIRES(!lock_);
814
815  // Revoke a run by adding it to non_full_runs_ or freeing the pages.
816  void RevokeRun(Thread* self, size_t idx, Run* run) REQUIRES(!lock_);
817
818  // Revoke the current runs which share an index with the thread local runs.
819  void RevokeThreadUnsafeCurrentRuns() REQUIRES(!lock_);
820
821  // Release a range of pages.
822  size_t ReleasePageRange(uint8_t* start, uint8_t* end) REQUIRES(lock_);
823
824  // Dumps the page map for debugging.
825  std::string DumpPageMap() REQUIRES(lock_);
826
827 public:
828  RosAlloc(void* base, size_t capacity, size_t max_capacity,
829           PageReleaseMode page_release_mode,
830           bool running_on_memory_tool,
831           size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold);
832  ~RosAlloc();
833
834  static size_t RunFreeListOffset() {
835    return OFFSETOF_MEMBER(Run, free_list_);
836  }
837  static size_t RunFreeListHeadOffset() {
838    return OFFSETOF_MEMBER(SlotFreeList<false>, head_);
839  }
840  static size_t RunFreeListSizeOffset() {
841    return OFFSETOF_MEMBER(SlotFreeList<false>, size_);
842  }
843  static size_t RunSlotNextOffset() {
844    return OFFSETOF_MEMBER(Slot, next_);
845  }
846
847  // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization.
848  // If used, this may cause race conditions if multiple threads are allocating at the same time.
849  template<bool kThreadSafe = true>
850  void* Alloc(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size,
851              size_t* bytes_tl_bulk_allocated)
852      REQUIRES(!lock_);
853  size_t Free(Thread* self, void* ptr)
854      REQUIRES(!bulk_free_lock_, !lock_);
855  size_t BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
856      REQUIRES(!bulk_free_lock_, !lock_);
857
858  // Returns true if the given allocation request can be allocated in
859  // an existing thread local run without allocating a new run.
860  ALWAYS_INLINE bool CanAllocFromThreadLocalRun(Thread* self, size_t size);
861  // Allocate the given allocation request in an existing thread local
862  // run without allocating a new run.
863  ALWAYS_INLINE void* AllocFromThreadLocalRun(Thread* self, size_t size, size_t* bytes_allocated);
864
865  // Returns the maximum bytes that could be allocated for the given
866  // size in bulk, that is the maximum value for the
867  // bytes_allocated_bulk out param returned by RosAlloc::Alloc().
868  ALWAYS_INLINE size_t MaxBytesBulkAllocatedFor(size_t size);
869
870  // Returns the size of the allocated slot for a given allocated memory chunk.
871  size_t UsableSize(const void* ptr) REQUIRES(!lock_);
872  // Returns the size of the allocated slot for a given size.
873  size_t UsableSize(size_t bytes) {
874    if (UNLIKELY(bytes > kLargeSizeThreshold)) {
875      return RoundUp(bytes, kPageSize);
876    } else {
877      return RoundToBracketSize(bytes);
878    }
879  }
880  // Try to reduce the current footprint by releasing the free page
881  // run at the end of the memory region, if any.
882  bool Trim() REQUIRES(!lock_);
883  // Iterates over all the memory slots and apply the given function.
884  void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
885                  void* arg)
886      REQUIRES(!lock_);
887
888  // Release empty pages.
889  size_t ReleasePages() REQUIRES(!lock_);
890  // Returns the current footprint.
891  size_t Footprint() REQUIRES(!lock_);
892  // Returns the current capacity, maximum footprint.
893  size_t FootprintLimit() REQUIRES(!lock_);
894  // Update the current capacity.
895  void SetFootprintLimit(size_t bytes) REQUIRES(!lock_);
896
897  // Releases the thread-local runs assigned to the given thread back to the common set of runs.
898  // Returns the total bytes of free slots in the revoked thread local runs. This is to be
899  // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting.
900  size_t RevokeThreadLocalRuns(Thread* thread) REQUIRES(!lock_, !bulk_free_lock_);
901  // Releases the thread-local runs assigned to all the threads back to the common set of runs.
902  // Returns the total bytes of free slots in the revoked thread local runs. This is to be
903  // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting.
904  size_t RevokeAllThreadLocalRuns() REQUIRES(!Locks::thread_list_lock_, !lock_, !bulk_free_lock_);
905  // Assert the thread local runs of a thread are revoked.
906  void AssertThreadLocalRunsAreRevoked(Thread* thread) REQUIRES(!bulk_free_lock_);
907  // Assert all the thread local runs are revoked.
908  void AssertAllThreadLocalRunsAreRevoked() REQUIRES(!Locks::thread_list_lock_, !bulk_free_lock_);
909
910  static Run* GetDedicatedFullRun() {
911    return dedicated_full_run_;
912  }
913  bool IsFreePage(size_t idx) const {
914    DCHECK_LT(idx, capacity_ / kPageSize);
915    uint8_t pm_type = page_map_[idx];
916    return pm_type == kPageMapReleased || pm_type == kPageMapEmpty;
917  }
918
919  // Callbacks for InspectAll that will count the number of bytes
920  // allocated and objects allocated, respectively.
921  static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
922  static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
923
924  bool DoesReleaseAllPages() const {
925    return page_release_mode_ == kPageReleaseModeAll;
926  }
927
928  // Verify for debugging.
929  void Verify() REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !bulk_free_lock_,
930                         !lock_);
931
932  void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes)
933      REQUIRES(!bulk_free_lock_, !lock_);
934
935  void DumpStats(std::ostream& os)
936      REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_) REQUIRES(!bulk_free_lock_);
937
938 private:
939  friend std::ostream& operator<<(std::ostream& os, const RosAlloc::PageMapKind& rhs);
940
941  DISALLOW_COPY_AND_ASSIGN(RosAlloc);
942};
943std::ostream& operator<<(std::ostream& os, const RosAlloc::PageMapKind& rhs);
944
945// Callback from rosalloc when it needs to increase the footprint. Must be implemented somewhere
946// else (currently rosalloc_space.cc).
947void* ArtRosAllocMoreCore(allocator::RosAlloc* rosalloc, intptr_t increment);
948
949}  // namespace allocator
950}  // namespace gc
951}  // namespace art
952
953#endif  // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
954