1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
18#define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
19
20#include <stdint.h>
21#include <stdlib.h>
22#include <sys/mman.h>
23#include <memory>
24#include <set>
25#include <string>
26#include <unordered_set>
27#include <vector>
28
29#include "base/allocator.h"
30#include "base/bit_utils.h"
31#include "base/mutex.h"
32#include "base/logging.h"
33#include "globals.h"
34#include "thread.h"
35
36namespace art {
37
38class MemMap;
39
40namespace gc {
41namespace allocator {
42
43// A runs-of-slots memory allocator.
44class RosAlloc {
45 private:
46  // Represents a run of free pages.
47  class FreePageRun {
48   public:
49    uint8_t magic_num_;  // The magic number used for debugging only.
50
51    bool IsFree() const {
52      return !kIsDebugBuild || magic_num_ == kMagicNumFree;
53    }
54    size_t ByteSize(RosAlloc* rosalloc) const REQUIRES(rosalloc->lock_) {
55      const uint8_t* fpr_base = reinterpret_cast<const uint8_t*>(this);
56      size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
57      size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx];
58      DCHECK_GE(byte_size, static_cast<size_t>(0));
59      DCHECK_ALIGNED(byte_size, kPageSize);
60      return byte_size;
61    }
62    void SetByteSize(RosAlloc* rosalloc, size_t byte_size)
63        REQUIRES(rosalloc->lock_) {
64      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
65      uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this);
66      size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
67      rosalloc->free_page_run_size_map_[pm_idx] = byte_size;
68    }
69    void* Begin() {
70      return reinterpret_cast<void*>(this);
71    }
72    void* End(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
73      uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this);
74      uint8_t* end = fpr_base + ByteSize(rosalloc);
75      return end;
76    }
77    bool IsLargerThanPageReleaseThreshold(RosAlloc* rosalloc)
78        REQUIRES(rosalloc->lock_) {
79      return ByteSize(rosalloc) >= rosalloc->page_release_size_threshold_;
80    }
81    bool IsAtEndOfSpace(RosAlloc* rosalloc)
82        REQUIRES(rosalloc->lock_) {
83      return reinterpret_cast<uint8_t*>(this) + ByteSize(rosalloc) == rosalloc->base_ + rosalloc->footprint_;
84    }
85    bool ShouldReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
86      switch (rosalloc->page_release_mode_) {
87        case kPageReleaseModeNone:
88          return false;
89        case kPageReleaseModeEnd:
90          return IsAtEndOfSpace(rosalloc);
91        case kPageReleaseModeSize:
92          return IsLargerThanPageReleaseThreshold(rosalloc);
93        case kPageReleaseModeSizeAndEnd:
94          return IsLargerThanPageReleaseThreshold(rosalloc) && IsAtEndOfSpace(rosalloc);
95        case kPageReleaseModeAll:
96          return true;
97        default:
98          LOG(FATAL) << "Unexpected page release mode ";
99          return false;
100      }
101    }
102    void ReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
103      uint8_t* start = reinterpret_cast<uint8_t*>(this);
104      size_t byte_size = ByteSize(rosalloc);
105      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
106      if (ShouldReleasePages(rosalloc)) {
107        rosalloc->ReleasePageRange(start, start + byte_size);
108      }
109    }
110
111   private:
112    DISALLOW_COPY_AND_ASSIGN(FreePageRun);
113  };
114
115  // The slot header.
116  class Slot {
117   public:
118    Slot* Next() const {
119      return next_;
120    }
121    void SetNext(Slot* next) {
122      next_ = next;
123    }
124    // The slot right before this slot in terms of the address.
125    Slot* Left(size_t bracket_size) {
126      return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) - bracket_size);
127    }
128    void Clear() {
129      next_ = nullptr;
130    }
131
132   private:
133    Slot* next_;  // Next slot in the list.
134    friend class RosAlloc;
135  };
136
137  // We use the tail (kUseTail == true) for the bulk or thread-local free lists to avoid the need to
138  // traverse the list from the head to the tail when merging free lists.
139  // We don't use the tail (kUseTail == false) for the free list to avoid the need to manage the
140  // tail in the allocation fast path for a performance reason.
141  template<bool kUseTail = true>
142  class SlotFreeList {
143   public:
144    SlotFreeList() : head_(0U), tail_(0), size_(0), padding_(0) {}
145    Slot* Head() const {
146      return reinterpret_cast<Slot*>(head_);
147    }
148    Slot* Tail() const {
149      CHECK(kUseTail);
150      return reinterpret_cast<Slot*>(tail_);
151    }
152    size_t Size() const {
153      return size_;
154    }
155    // Removes from the head of the free list.
156    Slot* Remove() {
157      Slot* slot;
158      if (kIsDebugBuild) {
159        Verify();
160      }
161      Slot** headp = reinterpret_cast<Slot**>(&head_);
162      Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
163      Slot* old_head = *headp;
164      if (old_head == nullptr) {
165        // List was empty.
166        if (kUseTail) {
167          DCHECK(*tailp == nullptr);
168        }
169        return nullptr;
170      } else {
171        // List wasn't empty.
172        if (kUseTail) {
173          DCHECK(*tailp != nullptr);
174        }
175        Slot* old_head_next = old_head->Next();
176        slot = old_head;
177        *headp = old_head_next;
178        if (kUseTail && old_head_next == nullptr) {
179          // List becomes empty.
180          *tailp = nullptr;
181        }
182      }
183      slot->Clear();
184      --size_;
185      if (kIsDebugBuild) {
186        Verify();
187      }
188      return slot;
189    }
190    void Add(Slot* slot) {
191      if (kIsDebugBuild) {
192        Verify();
193      }
194      DCHECK(slot != nullptr);
195      DCHECK(slot->Next() == nullptr);
196      Slot** headp = reinterpret_cast<Slot**>(&head_);
197      Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
198      Slot* old_head = *headp;
199      if (old_head == nullptr) {
200        // List was empty.
201        if (kUseTail) {
202          DCHECK(*tailp == nullptr);
203        }
204        *headp = slot;
205        if (kUseTail) {
206          *tailp = slot;
207        }
208      } else {
209        // List wasn't empty.
210        if (kUseTail) {
211          DCHECK(*tailp != nullptr);
212        }
213        *headp = slot;
214        slot->SetNext(old_head);
215      }
216      ++size_;
217      if (kIsDebugBuild) {
218        Verify();
219      }
220    }
221    // Merge the given list into this list. Empty the given list.
222    // Deliberately support only a kUseTail == true SlotFreeList parameter because 1) we don't
223    // currently have a situation where we need a kUseTail == false SlotFreeList parameter, and 2)
224    // supporting the kUseTail == false parameter would require a O(n) linked list traversal to do
225    // the merge if 'this' SlotFreeList has kUseTail == false, which we'd like to avoid.
226    void Merge(SlotFreeList<true>* list) {
227      if (kIsDebugBuild) {
228        Verify();
229        CHECK(list != nullptr);
230        list->Verify();
231      }
232      if (list->Size() == 0) {
233        return;
234      }
235      Slot** headp = reinterpret_cast<Slot**>(&head_);
236      Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
237      Slot* old_head = *headp;
238      if (old_head == nullptr) {
239        // List was empty.
240        *headp = list->Head();
241        if (kUseTail) {
242          *tailp = list->Tail();
243        }
244        size_ = list->Size();
245      } else {
246        // List wasn't empty.
247        DCHECK(list->Head() != nullptr);
248        *headp = list->Head();
249        DCHECK(list->Tail() != nullptr);
250        list->Tail()->SetNext(old_head);
251        // if kUseTail, no change to tailp.
252        size_ += list->Size();
253      }
254      list->Reset();
255      if (kIsDebugBuild) {
256        Verify();
257      }
258    }
259
260    void Reset() {
261      head_ = 0;
262      if (kUseTail) {
263        tail_ = 0;
264      }
265      size_ = 0;
266    }
267
268    void Verify() {
269      Slot* head = reinterpret_cast<Slot*>(head_);
270      Slot* tail = kUseTail ? reinterpret_cast<Slot*>(tail_) : nullptr;
271      if (size_ == 0) {
272        CHECK(head == nullptr);
273        if (kUseTail) {
274          CHECK(tail == nullptr);
275        }
276      } else {
277        CHECK(head != nullptr);
278        if (kUseTail) {
279          CHECK(tail != nullptr);
280        }
281        size_t count = 0;
282        for (Slot* slot = head; slot != nullptr; slot = slot->Next()) {
283          ++count;
284          if (kUseTail && slot->Next() == nullptr) {
285            CHECK_EQ(slot, tail);
286          }
287        }
288        CHECK_EQ(size_, count);
289      }
290    }
291
292   private:
293    // A pointer (Slot*) to the head of the list. Always 8 bytes so that we will have the same
294    // layout between 32 bit and 64 bit, which is not strictly necessary, but we do so for 1)
295    // uniformity, 2) we won't need to change this code if we move to a non-low 4G heap in the
296    // future, and 3) the space savings by using 32 bit fields in 32 bit would be lost in noise
297    // (won't open up enough space to cause an extra slot to be available).
298    uint64_t head_;
299    // A pointer (Slot*) to the tail of the list. Always 8 bytes so that we will have the same
300    // layout between 32 bit and 64 bit. The tail is stored to speed up merging of lists.
301    // Unused if kUseTail is false.
302    uint64_t tail_;
303    // The number of slots in the list. This is used to make it fast to check if a free list is all
304    // free without traversing the whole free list.
305    uint32_t size_;
306    uint32_t padding_ ATTRIBUTE_UNUSED;
307    friend class RosAlloc;
308  };
309
310  // Represents a run of memory slots of the same size.
311  //
312  // A run's memory layout:
313  //
314  // +-------------------+
315  // | magic_num         |
316  // +-------------------+
317  // | size_bracket_idx  |
318  // +-------------------+
319  // | is_thread_local   |
320  // +-------------------+
321  // | to_be_bulk_freed  |
322  // +-------------------+
323  // |                   |
324  // | free list         |
325  // |                   |
326  // +-------------------+
327  // |                   |
328  // | bulk free list    |
329  // |                   |
330  // +-------------------+
331  // |                   |
332  // | thread-local free |
333  // | list              |
334  // |                   |
335  // +-------------------+
336  // | padding due to    |
337  // | alignment         |
338  // +-------------------+
339  // | slot 0            |
340  // +-------------------+
341  // | slot 1            |
342  // +-------------------+
343  // | slot 2            |
344  // +-------------------+
345  // ...
346  // +-------------------+
347  // | last slot         |
348  // +-------------------+
349  //
350  class Run {
351   public:
352    uint8_t magic_num_;                 // The magic number used for debugging.
353    uint8_t size_bracket_idx_;          // The index of the size bracket of this run.
354    uint8_t is_thread_local_;           // True if this run is used as a thread-local run.
355    uint8_t to_be_bulk_freed_;          // Used within BulkFree() to flag a run that's involved with a bulk free.
356    uint32_t padding_ ATTRIBUTE_UNUSED;
357    // Use a tailless free list for free_list_ so that the alloc fast path does not manage the tail.
358    SlotFreeList<false> free_list_;
359    SlotFreeList<true> bulk_free_list_;
360    SlotFreeList<true> thread_local_free_list_;
361    // Padding due to alignment
362    // Slot 0
363    // Slot 1
364    // ...
365
366    // Returns the byte size of the header.
367    static size_t fixed_header_size() {
368      return sizeof(Run);
369    }
370    Slot* FirstSlot() const {
371      const uint8_t idx = size_bracket_idx_;
372      return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) + headerSizes[idx]);
373    }
374    Slot* LastSlot() {
375      const uint8_t idx = size_bracket_idx_;
376      const size_t bracket_size = bracketSizes[idx];
377      uintptr_t end = reinterpret_cast<uintptr_t>(End());
378      Slot* last_slot = reinterpret_cast<Slot*>(end - bracket_size);
379      DCHECK_LE(FirstSlot(), last_slot);
380      return last_slot;
381    }
382    SlotFreeList<false>* FreeList() {
383      return &free_list_;
384    }
385    SlotFreeList<true>* BulkFreeList() {
386      return &bulk_free_list_;
387    }
388    SlotFreeList<true>* ThreadLocalFreeList() {
389      return &thread_local_free_list_;
390    }
391    void* End() {
392      return reinterpret_cast<uint8_t*>(this) + kPageSize * numOfPages[size_bracket_idx_];
393    }
394    void SetIsThreadLocal(bool is_thread_local) {
395      is_thread_local_  = is_thread_local ? 1 : 0;
396    }
397    bool IsThreadLocal() const {
398      return is_thread_local_ != 0;
399    }
400    // Set up the free list for a new/empty run.
401    void InitFreeList() {
402      const uint8_t idx = size_bracket_idx_;
403      const size_t bracket_size = bracketSizes[idx];
404      Slot* first_slot = FirstSlot();
405      // Add backwards so the first slot is at the head of the list.
406      for (Slot* slot = LastSlot(); slot >= first_slot; slot = slot->Left(bracket_size)) {
407        free_list_.Add(slot);
408      }
409    }
410    // Merge the thread local free list to the free list.  Used when a thread-local run becomes
411    // full.
412    bool MergeThreadLocalFreeListToFreeList(bool* is_all_free_after_out);
413    // Merge the bulk free list to the free list. Used in a bulk free.
414    void MergeBulkFreeListToFreeList();
415    // Merge the bulk free list to the thread local free list. In a bulk free, as a two-step
416    // process, GC will first record all the slots to free in a run in the bulk free list where it
417    // can write without a lock, and later acquire a lock once per run to merge the bulk free list
418    // to the thread-local free list.
419    void MergeBulkFreeListToThreadLocalFreeList();
420    // Allocates a slot in a run.
421    ALWAYS_INLINE void* AllocSlot();
422    // Frees a slot in a run. This is used in a non-bulk free.
423    void FreeSlot(void* ptr);
424    // Add the given slot to the bulk free list. Returns the bracket size.
425    size_t AddToBulkFreeList(void* ptr);
426    // Add the given slot to the thread-local free list.
427    void AddToThreadLocalFreeList(void* ptr);
428    // Returns true if all the slots in the run are not in use.
429    bool IsAllFree() const {
430      return free_list_.Size() == numOfSlots[size_bracket_idx_];
431    }
432    // Returns the number of free slots.
433    size_t NumberOfFreeSlots() {
434      return free_list_.Size();
435    }
436    // Returns true if all the slots in the run are in use.
437    ALWAYS_INLINE bool IsFull();
438    // Returns true if the bulk free list is empty.
439    bool IsBulkFreeListEmpty() const {
440      return bulk_free_list_.Size() == 0;
441    }
442    // Returns true if the thread local free list is empty.
443    bool IsThreadLocalFreeListEmpty() const {
444      return thread_local_free_list_.Size() == 0;
445    }
446    // Zero the run's data.
447    void ZeroData();
448    // Zero the run's header and the slot headers.
449    void ZeroHeaderAndSlotHeaders();
450    // Iterate over all the slots and apply the given function.
451    void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
452    // Dump the run metadata for debugging.
453    std::string Dump();
454    // Verify for debugging.
455    void Verify(Thread* self, RosAlloc* rosalloc, bool running_on_memory_tool)
456        REQUIRES(Locks::mutator_lock_)
457        REQUIRES(Locks::thread_list_lock_);
458
459   private:
460    // The common part of AddToBulkFreeList() and AddToThreadLocalFreeList(). Returns the bracket
461    // size.
462    size_t AddToFreeListShared(void* ptr, SlotFreeList<true>* free_list, const char* caller_name);
463    // Turns a FreeList into a string for debugging.
464    template<bool kUseTail>
465    std::string FreeListToStr(SlotFreeList<kUseTail>* free_list);
466    // Check a given pointer is a valid slot address and return it as Slot*.
467    Slot* ToSlot(void* ptr) {
468      const uint8_t idx = size_bracket_idx_;
469      const size_t bracket_size = bracketSizes[idx];
470      const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr)
471          - reinterpret_cast<uint8_t*>(FirstSlot());
472      DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
473      size_t slot_idx = offset_from_slot_base / bracket_size;
474      DCHECK_LT(slot_idx, numOfSlots[idx]);
475      return reinterpret_cast<Slot*>(ptr);
476    }
477    size_t SlotIndex(Slot* slot) const {
478      const uint8_t idx = size_bracket_idx_;
479      const size_t bracket_size = bracketSizes[idx];
480      const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot)
481          - reinterpret_cast<uint8_t*>(FirstSlot());
482      DCHECK_EQ(offset_from_slot_base % bracket_size, 0U);
483      size_t slot_idx = offset_from_slot_base / bracket_size;
484      DCHECK_LT(slot_idx, numOfSlots[idx]);
485      return slot_idx;
486    }
487
488    // TODO: DISALLOW_COPY_AND_ASSIGN(Run);
489  };
490
491  // The magic number for a run.
492  static constexpr uint8_t kMagicNum = 42;
493  // The magic number for free pages.
494  static constexpr uint8_t kMagicNumFree = 43;
495  // The number of size brackets.
496  static constexpr size_t kNumOfSizeBrackets = 42;
497  // The sizes (the slot sizes, in bytes) of the size brackets.
498  static size_t bracketSizes[kNumOfSizeBrackets];
499  // The numbers of pages that are used for runs for each size bracket.
500  static size_t numOfPages[kNumOfSizeBrackets];
501  // The numbers of slots of the runs for each size bracket.
502  static size_t numOfSlots[kNumOfSizeBrackets];
503  // The header sizes in bytes of the runs for each size bracket.
504  static size_t headerSizes[kNumOfSizeBrackets];
505
506  // Initialize the run specs (the above arrays).
507  static void Initialize();
508  static bool initialized_;
509
510  // Returns the byte size of the bracket size from the index.
511  static size_t IndexToBracketSize(size_t idx) {
512    DCHECK_LT(idx, kNumOfSizeBrackets);
513    return bracketSizes[idx];
514  }
515  // Returns the index of the size bracket from the bracket size.
516  static size_t BracketSizeToIndex(size_t size) {
517    DCHECK(8 <= size &&
518           ((size <= kMaxThreadLocalBracketSize && size % kThreadLocalBracketQuantumSize == 0) ||
519            (size <= kMaxRegularBracketSize && size % kBracketQuantumSize == 0) ||
520            size == 1 * KB || size == 2 * KB));
521    size_t idx;
522    if (UNLIKELY(size == 1 * KB)) {
523      idx = kNumOfSizeBrackets - 2;
524    } else if (UNLIKELY(size == 2 * KB)) {
525      idx = kNumOfSizeBrackets - 1;
526    } else if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
527      DCHECK_EQ(size % kThreadLocalBracketQuantumSize, 0U);
528      idx = size / kThreadLocalBracketQuantumSize - 1;
529    } else {
530      DCHECK(size <= kMaxRegularBracketSize);
531      DCHECK_EQ((size - kMaxThreadLocalBracketSize) % kBracketQuantumSize, 0U);
532      idx = ((size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
533          + kNumThreadLocalSizeBrackets;
534    }
535    DCHECK(bracketSizes[idx] == size);
536    return idx;
537  }
538  // Returns true if the given allocation size is for a thread local allocation.
539  static bool IsSizeForThreadLocal(size_t size) {
540    bool is_size_for_thread_local = size <= kMaxThreadLocalBracketSize;
541    DCHECK(size > kLargeSizeThreshold ||
542           (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets)));
543    return is_size_for_thread_local;
544  }
545  // Rounds up the size up the nearest bracket size.
546  static size_t RoundToBracketSize(size_t size) {
547    DCHECK(size <= kLargeSizeThreshold);
548    if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
549      return RoundUp(size, kThreadLocalBracketQuantumSize);
550    } else if (size <= kMaxRegularBracketSize) {
551      return RoundUp(size, kBracketQuantumSize);
552    } else if (UNLIKELY(size <= 1 * KB)) {
553      return 1 * KB;
554    } else {
555      DCHECK_LE(size, 2 * KB);
556      return 2 * KB;
557    }
558  }
559  // Returns the size bracket index from the byte size with rounding.
560  static size_t SizeToIndex(size_t size) {
561    DCHECK(size <= kLargeSizeThreshold);
562    if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
563      return RoundUp(size, kThreadLocalBracketQuantumSize) / kThreadLocalBracketQuantumSize - 1;
564    } else if (size <= kMaxRegularBracketSize) {
565      return (RoundUp(size, kBracketQuantumSize) - kMaxThreadLocalBracketSize) / kBracketQuantumSize
566          - 1 + kNumThreadLocalSizeBrackets;
567    } else if (size <= 1 * KB) {
568      return kNumOfSizeBrackets - 2;
569    } else {
570      DCHECK_LE(size, 2 * KB);
571      return kNumOfSizeBrackets - 1;
572    }
573  }
574  // A combination of SizeToIndex() and RoundToBracketSize().
575  static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) {
576    DCHECK(size <= kLargeSizeThreshold);
577    size_t idx;
578    size_t bracket_size;
579    if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
580      bracket_size = RoundUp(size, kThreadLocalBracketQuantumSize);
581      idx = bracket_size / kThreadLocalBracketQuantumSize - 1;
582    } else if (size <= kMaxRegularBracketSize) {
583      bracket_size = RoundUp(size, kBracketQuantumSize);
584      idx = ((bracket_size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
585          + kNumThreadLocalSizeBrackets;
586    } else if (size <= 1 * KB) {
587      bracket_size = 1 * KB;
588      idx = kNumOfSizeBrackets - 2;
589    } else {
590      DCHECK(size <= 2 * KB);
591      bracket_size = 2 * KB;
592      idx = kNumOfSizeBrackets - 1;
593    }
594    DCHECK_EQ(idx, SizeToIndex(size)) << idx;
595    DCHECK_EQ(bracket_size, IndexToBracketSize(idx)) << idx;
596    DCHECK_EQ(bracket_size, bracketSizes[idx]) << idx;
597    DCHECK_LE(size, bracket_size) << idx;
598    DCHECK(size > kMaxRegularBracketSize ||
599           (size <= kMaxThreadLocalBracketSize &&
600            bracket_size - size < kThreadLocalBracketQuantumSize) ||
601           (size <= kMaxRegularBracketSize && bracket_size - size < kBracketQuantumSize)) << idx;
602    *bracket_size_out = bracket_size;
603    return idx;
604  }
605
606  // Returns the page map index from an address. Requires that the
607  // address is page size aligned.
608  size_t ToPageMapIndex(const void* addr) const {
609    DCHECK_LE(base_, addr);
610    DCHECK_LT(addr, base_ + capacity_);
611    size_t byte_offset = reinterpret_cast<const uint8_t*>(addr) - base_;
612    DCHECK_EQ(byte_offset % static_cast<size_t>(kPageSize), static_cast<size_t>(0));
613    return byte_offset / kPageSize;
614  }
615  // Returns the page map index from an address with rounding.
616  size_t RoundDownToPageMapIndex(const void* addr) const {
617    DCHECK(base_ <= addr && addr < reinterpret_cast<uint8_t*>(base_) + capacity_);
618    return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize;
619  }
620
621  // A memory allocation request larger than this size is treated as a large object and allocated
622  // at a page-granularity.
623  static const size_t kLargeSizeThreshold = 2048;
624
625  // If true, check that the returned memory is actually zero.
626  static constexpr bool kCheckZeroMemory = kIsDebugBuild;
627  // Valgrind protects memory, so do not check memory when running under valgrind. In a normal
628  // build with kCheckZeroMemory the whole test should be optimized away.
629  // TODO: Unprotect before checks.
630  ALWAYS_INLINE bool ShouldCheckZeroMemory();
631
632  // If true, log verbose details of operations.
633  static constexpr bool kTraceRosAlloc = false;
634
635  struct hash_run {
636    size_t operator()(const RosAlloc::Run* r) const {
637      return reinterpret_cast<size_t>(r);
638    }
639  };
640
641  struct eq_run {
642    bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const {
643      return r1 == r2;
644    }
645  };
646
647 public:
648  // Different page release modes.
649  enum PageReleaseMode {
650    kPageReleaseModeNone,         // Release no empty pages.
651    kPageReleaseModeEnd,          // Release empty pages at the end of the space.
652    kPageReleaseModeSize,         // Release empty pages that are larger than the threshold.
653    kPageReleaseModeSizeAndEnd,   // Release empty pages that are larger than the threshold or
654                                  // at the end of the space.
655    kPageReleaseModeAll,          // Release all empty pages.
656  };
657
658  // The default value for page_release_size_threshold_.
659  static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB;
660
661  // We use thread-local runs for the size brackets whose indexes
662  // are less than this index. We use shared (current) runs for the rest.
663  // Sync this with the length of Thread::rosalloc_runs_.
664  static const size_t kNumThreadLocalSizeBrackets = 16;
665  static_assert(kNumThreadLocalSizeBrackets == kNumRosAllocThreadLocalSizeBracketsInThread,
666                "Mismatch between kNumThreadLocalSizeBrackets and "
667                "kNumRosAllocThreadLocalSizeBracketsInThread");
668
669  // The size of the largest bracket we use thread-local runs for.
670  // This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1].
671  static const size_t kMaxThreadLocalBracketSize = 128;
672
673  // We use regular (8 or 16-bytes increment) runs for the size brackets whose indexes are less than
674  // this index.
675  static const size_t kNumRegularSizeBrackets = 40;
676
677  // The size of the largest regular (8 or 16-byte increment) bracket. Non-regular brackets are the
678  // 1 KB and the 2 KB brackets. This should be equal to bracketSizes[kNumRegularSizeBrackets - 1].
679  static const size_t kMaxRegularBracketSize = 512;
680
681  // The bracket size increment for the thread-local brackets (<= kMaxThreadLocalBracketSize bytes).
682  static constexpr size_t kThreadLocalBracketQuantumSize = 8;
683
684  // Equal to Log2(kThreadLocalBracketQuantumSize).
685  static constexpr size_t kThreadLocalBracketQuantumSizeShift = 3;
686
687  // The bracket size increment for the non-thread-local, regular brackets (of size <=
688  // kMaxRegularBracketSize bytes and > kMaxThreadLocalBracketSize bytes).
689  static constexpr size_t kBracketQuantumSize = 16;
690
691  // Equal to Log2(kBracketQuantumSize).
692  static constexpr size_t kBracketQuantumSizeShift = 4;
693
694 private:
695  // The base address of the memory region that's managed by this allocator.
696  uint8_t* base_;
697
698  // The footprint in bytes of the currently allocated portion of the
699  // memory region.
700  size_t footprint_;
701
702  // The maximum footprint. The address, base_ + capacity_, indicates
703  // the end of the memory region that's currently managed by this allocator.
704  size_t capacity_;
705
706  // The maximum capacity. The address, base_ + max_capacity_, indicates
707  // the end of the memory region that's ever managed by this allocator.
708  size_t max_capacity_;
709
710  // The run sets that hold the runs whose slots are not all
711  // full. non_full_runs_[i] is guarded by size_bracket_locks_[i].
712  AllocationTrackingSet<Run*, kAllocatorTagRosAlloc> non_full_runs_[kNumOfSizeBrackets];
713  // The run sets that hold the runs whose slots are all full. This is
714  // debug only. full_runs_[i] is guarded by size_bracket_locks_[i].
715  std::unordered_set<Run*, hash_run, eq_run, TrackingAllocator<Run*, kAllocatorTagRosAlloc>>
716      full_runs_[kNumOfSizeBrackets];
717  // The set of free pages.
718  AllocationTrackingSet<FreePageRun*, kAllocatorTagRosAlloc> free_page_runs_ GUARDED_BY(lock_);
719  // The dedicated full run, it is always full and shared by all threads when revoking happens.
720  // This is an optimization since enables us to avoid a null check for revoked runs.
721  static Run* dedicated_full_run_;
722  // Using size_t to ensure that it is at least word aligned.
723  static size_t dedicated_full_run_storage_[];
724  // The current runs where the allocations are first attempted for
725  // the size brackes that do not use thread-local
726  // runs. current_runs_[i] is guarded by size_bracket_locks_[i].
727  Run* current_runs_[kNumOfSizeBrackets];
728  // The mutexes, one per size bracket.
729  Mutex* size_bracket_locks_[kNumOfSizeBrackets];
730  // Bracket lock names (since locks only have char* names).
731  std::string size_bracket_lock_names_[kNumOfSizeBrackets];
732  // The types of page map entries.
733  enum PageMapKind {
734    kPageMapReleased = 0,     // Zero and released back to the OS.
735    kPageMapEmpty,            // Zero but probably dirty.
736    kPageMapRun,              // The beginning of a run.
737    kPageMapRunPart,          // The non-beginning part of a run.
738    kPageMapLargeObject,      // The beginning of a large object.
739    kPageMapLargeObjectPart,  // The non-beginning part of a large object.
740  };
741  // The table that indicates what pages are currently used for.
742  volatile uint8_t* page_map_;  // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree.
743  size_t page_map_size_;
744  size_t max_page_map_size_;
745  std::unique_ptr<MemMap> page_map_mem_map_;
746
747  // The table that indicates the size of free page runs. These sizes
748  // are stored here to avoid storing in the free page header and
749  // release backing pages.
750  std::vector<size_t, TrackingAllocator<size_t, kAllocatorTagRosAlloc>> free_page_run_size_map_
751      GUARDED_BY(lock_);
752  // The global lock. Used to guard the page map, the free page set,
753  // and the footprint.
754  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
755  // The reader-writer lock to allow one bulk free at a time while
756  // allowing multiple individual frees at the same time. Also, this
757  // is used to avoid race conditions between BulkFree() and
758  // RevokeThreadLocalRuns() on the bulk free list.
759  ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
760
761  // The page release mode.
762  const PageReleaseMode page_release_mode_;
763  // Under kPageReleaseModeSize(AndEnd), if the free page run size is
764  // greater than or equal to this value, release pages.
765  const size_t page_release_size_threshold_;
766
767  // Whether this allocator is running under Valgrind.
768  bool is_running_on_memory_tool_;
769
770  // The base address of the memory region that's managed by this allocator.
771  uint8_t* Begin() { return base_; }
772  // The end address of the memory region that's managed by this allocator.
773  uint8_t* End() { return base_ + capacity_; }
774
775  // Page-granularity alloc/free
776  void* AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type)
777      REQUIRES(lock_);
778  // Returns how many bytes were freed.
779  size_t FreePages(Thread* self, void* ptr, bool already_zero) REQUIRES(lock_);
780
781  // Allocate/free a run slot.
782  void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size,
783                     size_t* bytes_tl_bulk_allocated)
784      REQUIRES(!lock_);
785  // Allocate/free a run slot without acquiring locks.
786  // TODO: REQUIRES(Locks::mutator_lock_)
787  void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated,
788                                 size_t* usable_size, size_t* bytes_tl_bulk_allocated)
789      REQUIRES(!lock_);
790  void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx) REQUIRES(!lock_);
791
792  // Returns the bracket size.
793  size_t FreeFromRun(Thread* self, void* ptr, Run* run)
794      REQUIRES(!lock_);
795
796  // Used to allocate a new thread local run for a size bracket.
797  Run* AllocRun(Thread* self, size_t idx) REQUIRES(!lock_);
798
799  // Used to acquire a new/reused run for a size bracket. Used when a
800  // thread-local or current run gets full.
801  Run* RefillRun(Thread* self, size_t idx) REQUIRES(!lock_);
802
803  // The internal of non-bulk Free().
804  size_t FreeInternal(Thread* self, void* ptr) REQUIRES(!lock_);
805
806  // Allocates large objects.
807  void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated,
808                         size_t* usable_size, size_t* bytes_tl_bulk_allocated)
809      REQUIRES(!lock_);
810
811  // Revoke a run by adding it to non_full_runs_ or freeing the pages.
812  void RevokeRun(Thread* self, size_t idx, Run* run) REQUIRES(!lock_);
813
814  // Revoke the current runs which share an index with the thread local runs.
815  void RevokeThreadUnsafeCurrentRuns() REQUIRES(!lock_);
816
817  // Release a range of pages.
818  size_t ReleasePageRange(uint8_t* start, uint8_t* end) REQUIRES(lock_);
819
820  // Dumps the page map for debugging.
821  std::string DumpPageMap() REQUIRES(lock_);
822
823 public:
824  RosAlloc(void* base, size_t capacity, size_t max_capacity,
825           PageReleaseMode page_release_mode,
826           bool running_on_memory_tool,
827           size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold);
828  ~RosAlloc();
829
830  static size_t RunFreeListOffset() {
831    return OFFSETOF_MEMBER(Run, free_list_);
832  }
833  static size_t RunFreeListHeadOffset() {
834    return OFFSETOF_MEMBER(SlotFreeList<false>, head_);
835  }
836  static size_t RunFreeListSizeOffset() {
837    return OFFSETOF_MEMBER(SlotFreeList<false>, size_);
838  }
839  static size_t RunSlotNextOffset() {
840    return OFFSETOF_MEMBER(Slot, next_);
841  }
842
843  // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization.
844  // If used, this may cause race conditions if multiple threads are allocating at the same time.
845  template<bool kThreadSafe = true>
846  void* Alloc(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size,
847              size_t* bytes_tl_bulk_allocated)
848      REQUIRES(!lock_);
849  size_t Free(Thread* self, void* ptr)
850      REQUIRES(!bulk_free_lock_, !lock_);
851  size_t BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
852      REQUIRES(!bulk_free_lock_, !lock_);
853
854  // Returns true if the given allocation request can be allocated in
855  // an existing thread local run without allocating a new run.
856  ALWAYS_INLINE bool CanAllocFromThreadLocalRun(Thread* self, size_t size);
857  // Allocate the given allocation request in an existing thread local
858  // run without allocating a new run.
859  ALWAYS_INLINE void* AllocFromThreadLocalRun(Thread* self, size_t size, size_t* bytes_allocated);
860
861  // Returns the maximum bytes that could be allocated for the given
862  // size in bulk, that is the maximum value for the
863  // bytes_allocated_bulk out param returned by RosAlloc::Alloc().
864  ALWAYS_INLINE size_t MaxBytesBulkAllocatedFor(size_t size);
865
866  // Returns the size of the allocated slot for a given allocated memory chunk.
867  size_t UsableSize(const void* ptr) REQUIRES(!lock_);
868  // Returns the size of the allocated slot for a given size.
869  size_t UsableSize(size_t bytes) {
870    if (UNLIKELY(bytes > kLargeSizeThreshold)) {
871      return RoundUp(bytes, kPageSize);
872    } else {
873      return RoundToBracketSize(bytes);
874    }
875  }
876  // Try to reduce the current footprint by releasing the free page
877  // run at the end of the memory region, if any.
878  bool Trim() REQUIRES(!lock_);
879  // Iterates over all the memory slots and apply the given function.
880  void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
881                  void* arg)
882      REQUIRES(!lock_);
883
884  // Release empty pages.
885  size_t ReleasePages() REQUIRES(!lock_);
886  // Returns the current footprint.
887  size_t Footprint() REQUIRES(!lock_);
888  // Returns the current capacity, maximum footprint.
889  size_t FootprintLimit() REQUIRES(!lock_);
890  // Update the current capacity.
891  void SetFootprintLimit(size_t bytes) REQUIRES(!lock_);
892
893  // Releases the thread-local runs assigned to the given thread back to the common set of runs.
894  // Returns the total bytes of free slots in the revoked thread local runs. This is to be
895  // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting.
896  size_t RevokeThreadLocalRuns(Thread* thread) REQUIRES(!lock_, !bulk_free_lock_);
897  // Releases the thread-local runs assigned to all the threads back to the common set of runs.
898  // Returns the total bytes of free slots in the revoked thread local runs. This is to be
899  // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting.
900  size_t RevokeAllThreadLocalRuns() REQUIRES(!Locks::thread_list_lock_, !lock_, !bulk_free_lock_);
901  // Assert the thread local runs of a thread are revoked.
902  void AssertThreadLocalRunsAreRevoked(Thread* thread) REQUIRES(!bulk_free_lock_);
903  // Assert all the thread local runs are revoked.
904  void AssertAllThreadLocalRunsAreRevoked() REQUIRES(!Locks::thread_list_lock_, !bulk_free_lock_);
905
906  static Run* GetDedicatedFullRun() {
907    return dedicated_full_run_;
908  }
909  bool IsFreePage(size_t idx) const {
910    DCHECK_LT(idx, capacity_ / kPageSize);
911    uint8_t pm_type = page_map_[idx];
912    return pm_type == kPageMapReleased || pm_type == kPageMapEmpty;
913  }
914
915  // Callbacks for InspectAll that will count the number of bytes
916  // allocated and objects allocated, respectively.
917  static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
918  static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
919
920  bool DoesReleaseAllPages() const {
921    return page_release_mode_ == kPageReleaseModeAll;
922  }
923
924  // Verify for debugging.
925  void Verify() REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !bulk_free_lock_,
926                         !lock_);
927
928  void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes)
929      REQUIRES(!bulk_free_lock_, !lock_);
930
931  void DumpStats(std::ostream& os)
932      REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_) REQUIRES(!bulk_free_lock_);
933
934 private:
935  friend std::ostream& operator<<(std::ostream& os, const RosAlloc::PageMapKind& rhs);
936
937  DISALLOW_COPY_AND_ASSIGN(RosAlloc);
938};
939std::ostream& operator<<(std::ostream& os, const RosAlloc::PageMapKind& rhs);
940
941// Callback from rosalloc when it needs to increase the footprint. Must be implemented somewhere
942// else (currently rosalloc_space.cc).
943void* ArtRosAllocMoreCore(allocator::RosAlloc* rosalloc, intptr_t increment);
944
945}  // namespace allocator
946}  // namespace gc
947}  // namespace art
948
949#endif  // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
950