sanitizer_allocator.h revision ce17384b74b0dda2e246ce1dedf29b5d46df9c60
1//===-- sanitizer_allocator.h -----------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Specialized memory allocator for ThreadSanitizer, MemorySanitizer, etc.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef SANITIZER_ALLOCATOR_H
15#define SANITIZER_ALLOCATOR_H
16
17#include "sanitizer_internal_defs.h"
18#include "sanitizer_common.h"
19#include "sanitizer_libc.h"
20#include "sanitizer_list.h"
21#include "sanitizer_mutex.h"
22
23namespace __sanitizer {
24
25// SizeClassMap maps allocation sizes into size classes and back.
26// Class 0 corresponds to size 0.
27// Classes 1 - 16 correspond to sizes 8 - 128 (size = class_id * 8).
28// Next 8 classes: 128 + i * 16 (i = 1 to 8).
29// Next 8 classes: 256 + i * 32 (i = 1 to 8).
30// ...
31// Next 8 classes: 2^k + i * 2^(k-3) (i = 1 to 8).
32// Last class corresponds to kMaxSize = 1 << kMaxSizeLog.
33//
34// This structure of the size class map gives us:
35//   - Efficient table-free class-to-size and size-to-class functions.
36//   - Difference between two consequent size classes is betweed 12% and 6%
37//
38// This class also gives a hint to a thread-caching allocator about the amount
39// of chunks that need to be cached per-thread:
40//  - kMaxNumCached is the maximal number of chunks per size class.
41//  - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class.
42//
43// Part of output of SizeClassMap::Print():
44//    c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0
45//    c01 => s: 8 diff: +8 00% l 3 cached: 256 2048; id 1
46//    c02 => s: 16 diff: +8 100% l 4 cached: 256 4096; id 2
47//    ...
48//    c07 => s: 56 diff: +8 16% l 5 cached: 256 14336; id 7
49//
50//    c08 => s: 64 diff: +8 14% l 6 cached: 256 16384; id 8
51//    ...
52//    c15 => s: 120 diff: +8 07% l 6 cached: 256 30720; id 15
53//
54//    c16 => s: 128 diff: +8 06% l 7 cached: 256 32768; id 16
55//    c17 => s: 144 diff: +16 12% l 7 cached: 227 32688; id 17
56//    ...
57//    c23 => s: 240 diff: +16 07% l 7 cached: 136 32640; id 23
58//
59//    c24 => s: 256 diff: +16 06% l 8 cached: 128 32768; id 24
60//    c25 => s: 288 diff: +32 12% l 8 cached: 113 32544; id 25
61//    ...
62//    c31 => s: 480 diff: +32 07% l 8 cached: 68 32640; id 31
63//
64//    c32 => s: 512 diff: +32 06% l 9 cached: 64 32768; id 32
65
66
67template <uptr kMaxSizeLog, uptr kMaxNumCached, uptr kMaxBytesCachedLog>
68class SizeClassMap {
69  static const uptr kMinSizeLog = 3;
70  static const uptr kMidSizeLog = kMinSizeLog + 4;
71  static const uptr kMinSize = 1 << kMinSizeLog;
72  static const uptr kMidSize = 1 << kMidSizeLog;
73  static const uptr kMidClass = kMidSize / kMinSize;
74  static const uptr S = 3;
75  static const uptr M = (1 << S) - 1;
76
77 public:
78  static const uptr kMaxSize = 1 << kMaxSizeLog;
79  static const uptr kNumClasses =
80      kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1;
81  COMPILER_CHECK(kNumClasses >= 32 && kNumClasses <= 256);
82  static const uptr kNumClassesRounded =
83      kNumClasses == 32  ? 32 :
84      kNumClasses <= 64  ? 64 :
85      kNumClasses <= 128 ? 128 : 256;
86
87  static uptr Size(uptr class_id) {
88    if (class_id <= kMidClass)
89      return kMinSize * class_id;
90    class_id -= kMidClass;
91    uptr t = kMidSize << (class_id >> S);
92    return t + (t >> S) * (class_id & M);
93  }
94
95  static uptr ClassID(uptr size) {
96    if (size <= kMidSize)
97      return (size + kMinSize - 1) >> kMinSizeLog;
98    if (size > kMaxSize) return 0;
99    uptr l = SANITIZER_WORDSIZE - 1 - __builtin_clzl(size);
100    uptr hbits = (size >> (l - S)) & M;
101    uptr lbits = size & ((1 << (l - S)) - 1);
102    uptr l1 = l - kMidSizeLog;
103    return kMidClass + (l1 << S) + hbits + (lbits > 0);
104  }
105
106  static uptr MaxCached(uptr class_id) {
107    if (class_id == 0) return 0;
108    uptr n = (1UL << kMaxBytesCachedLog) / Size(class_id);
109    return Max(1UL, Min(kMaxNumCached, n));
110  }
111
112  static void Print() {
113    uptr prev_s = 0;
114    uptr total_cached = 0;
115    for (uptr i = 0; i < kNumClasses; i++) {
116      uptr s = Size(i);
117      if (s >= kMidSize / 2 && (s & (s - 1)) == 0)
118        Printf("\n");
119      uptr d = s - prev_s;
120      uptr p = prev_s ? (d * 100 / prev_s) : 0;
121      uptr l = SANITIZER_WORDSIZE - 1 - __builtin_clzl(s);
122      uptr cached = MaxCached(i) * s;
123      Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
124             "cached: %zd %zd; id %zd\n",
125             i, Size(i), d, p, l, MaxCached(i), cached, ClassID(s));
126      total_cached += cached;
127      prev_s = s;
128    }
129    Printf("Total cached: %zd\n", total_cached);
130  }
131
132  static void Validate() {
133    for (uptr c = 1; c < kNumClasses; c++) {
134      // Printf("Validate: c%zd\n", c);
135      uptr s = Size(c);
136      CHECK_EQ(ClassID(s), c);
137      if (c != kNumClasses - 1)
138        CHECK_EQ(ClassID(s + 1), c + 1);
139      CHECK_EQ(ClassID(s - 1), c);
140      if (c)
141        CHECK_GT(Size(c), Size(c-1));
142    }
143    CHECK_EQ(ClassID(kMaxSize + 1), 0);
144
145    for (uptr s = 1; s <= kMaxSize; s++) {
146      uptr c = ClassID(s);
147      // Printf("s%zd => c%zd\n", s, c);
148      CHECK_LT(c, kNumClasses);
149      CHECK_GE(Size(c), s);
150      if (c > 0)
151        CHECK_LT(Size(c-1), s);
152    }
153  }
154};
155
156typedef SizeClassMap<15, 256, 16> DefaultSizeClassMap;
157typedef SizeClassMap<15, 64, 14> CompactSizeClassMap;
158
159
160struct AllocatorListNode {
161  AllocatorListNode *next;
162};
163
164typedef IntrusiveList<AllocatorListNode> AllocatorFreeList;
165
166// Move at most max_count chunks from allocate_from to allocate_to.
167// This function is better be a method of AllocatorFreeList, but we can't
168// inherit it from IntrusiveList as the ancient gcc complains about non-PODness.
169static inline uptr BulkMove(uptr max_count,
170                            AllocatorFreeList *allocate_from,
171                            AllocatorFreeList *allocate_to) {
172  CHECK(!allocate_from->empty());
173  CHECK(allocate_to->empty());
174  uptr res = 0;
175  if (allocate_from->size() <= max_count) {
176    res = allocate_from->size();
177    allocate_to->append_front(allocate_from);
178    CHECK(allocate_from->empty());
179  } else {
180    for (uptr i = 0; i < max_count; i++) {
181      AllocatorListNode *node = allocate_from->front();
182      allocate_from->pop_front();
183      allocate_to->push_front(node);
184    }
185    res = max_count;
186    CHECK(!allocate_from->empty());
187  }
188  CHECK(!allocate_to->empty());
189  return res;
190}
191
192// Allocators call these callbacks on mmap/munmap.
193struct NoOpMapUnmapCallback {
194  void OnMap(uptr p, uptr size) const { }
195  void OnUnmap(uptr p, uptr size) const { }
196};
197
198// SizeClassAllocator64 -- allocator for 64-bit address space.
199//
200// Space: a portion of address space of kSpaceSize bytes starting at
201// a fixed address (kSpaceBeg). Both constants are powers of two and
202// kSpaceBeg is kSpaceSize-aligned.
203// At the beginning the entire space is mprotect-ed, then small parts of it
204// are mapped on demand.
205//
206// Region: a part of Space dedicated to a single size class.
207// There are kNumClasses Regions of equal size.
208//
209// UserChunk: a piece of memory returned to user.
210// MetaChunk: kMetadataSize bytes of metadata associated with a UserChunk.
211//
212// A Region looks like this:
213// UserChunk1 ... UserChunkN <gap> MetaChunkN ... MetaChunk1
214template <const uptr kSpaceBeg, const uptr kSpaceSize,
215          const uptr kMetadataSize, class SizeClassMap,
216          class MapUnmapCallback = NoOpMapUnmapCallback>
217class SizeClassAllocator64 {
218 public:
219  void Init() {
220    CHECK_EQ(kSpaceBeg,
221             reinterpret_cast<uptr>(Mprotect(kSpaceBeg, kSpaceSize)));
222    MapWithCallback(kSpaceEnd, AdditionalSize());
223  }
224
225  void MapWithCallback(uptr beg, uptr size) {
226    CHECK_EQ(beg, reinterpret_cast<uptr>(MmapFixedOrDie(beg, size)));
227    MapUnmapCallback().OnMap(beg, size);
228  }
229
230  void UnmapWithCallback(uptr beg, uptr size) {
231    MapUnmapCallback().OnUnmap(beg, size);
232    UnmapOrDie(reinterpret_cast<void *>(beg), size);
233  }
234
235  static bool CanAllocate(uptr size, uptr alignment) {
236    return size <= SizeClassMap::kMaxSize &&
237      alignment <= SizeClassMap::kMaxSize;
238  }
239
240  void *Allocate(uptr size, uptr alignment) {
241    if (size < alignment) size = alignment;
242    CHECK(CanAllocate(size, alignment));
243    return AllocateBySizeClass(ClassID(size));
244  }
245
246  void Deallocate(void *p) {
247    CHECK(PointerIsMine(p));
248    DeallocateBySizeClass(p, GetSizeClass(p));
249  }
250
251  // Allocate several chunks of the given class_id.
252  void BulkAllocate(uptr class_id, AllocatorFreeList *free_list) {
253    CHECK_LT(class_id, kNumClasses);
254    RegionInfo *region = GetRegionInfo(class_id);
255    SpinMutexLock l(&region->mutex);
256    if (region->free_list.empty()) {
257      PopulateFreeList(class_id, region);
258    }
259    region->n_allocated += BulkMove(SizeClassMap::MaxCached(class_id),
260                                    &region->free_list, free_list);
261  }
262
263  // Swallow the entire free_list for the given class_id.
264  void BulkDeallocate(uptr class_id, AllocatorFreeList *free_list) {
265    CHECK_LT(class_id, kNumClasses);
266    RegionInfo *region = GetRegionInfo(class_id);
267    SpinMutexLock l(&region->mutex);
268    region->n_freed += free_list->size();
269    region->free_list.append_front(free_list);
270  }
271
272  static bool PointerIsMine(void *p) {
273    return reinterpret_cast<uptr>(p) / kSpaceSize == kSpaceBeg / kSpaceSize;
274  }
275
276  static uptr GetSizeClass(void *p) {
277    return (reinterpret_cast<uptr>(p) / kRegionSize) % kNumClassesRounded;
278  }
279
280  void *GetBlockBegin(void *p) {
281    uptr class_id = GetSizeClass(p);
282    uptr size = SizeClassMap::Size(class_id);
283    uptr chunk_idx = GetChunkIdx((uptr)p, size);
284    uptr reg_beg = (uptr)p & ~(kRegionSize - 1);
285    uptr beg = chunk_idx * size;
286    uptr next_beg = beg + size;
287    RegionInfo *region = GetRegionInfo(class_id);
288    if (region->mapped_user >= next_beg)
289      return reinterpret_cast<void*>(reg_beg + beg);
290    return 0;
291  }
292
293  static uptr GetActuallyAllocatedSize(void *p) {
294    CHECK(PointerIsMine(p));
295    return SizeClassMap::Size(GetSizeClass(p));
296  }
297
298  uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
299
300  void *GetMetaData(void *p) {
301    uptr class_id = GetSizeClass(p);
302    uptr size = SizeClassMap::Size(class_id);
303    uptr chunk_idx = GetChunkIdx(reinterpret_cast<uptr>(p), size);
304    return reinterpret_cast<void*>(kSpaceBeg + (kRegionSize * (class_id + 1)) -
305                                   (1 + chunk_idx) * kMetadataSize);
306  }
307
308  uptr TotalMemoryUsed() {
309    uptr res = 0;
310    for (uptr i = 0; i < kNumClasses; i++)
311      res += GetRegionInfo(i)->allocated_user;
312    return res;
313  }
314
315  // Test-only.
316  void TestOnlyUnmap() {
317    UnmapWithCallback(kSpaceBeg, kSpaceSize + AdditionalSize());
318  }
319
320  void PrintStats() {
321    uptr total_mapped = 0;
322    uptr n_allocated = 0;
323    uptr n_freed = 0;
324    for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
325      RegionInfo *region = GetRegionInfo(class_id);
326      total_mapped += region->mapped_user;
327      n_allocated += region->n_allocated;
328      n_freed += region->n_freed;
329    }
330    Printf("Stats: SizeClassAllocator64: %zdM mapped in %zd allocations; "
331           "remains %zd\n",
332           total_mapped >> 20, n_allocated, n_allocated - n_freed);
333    for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
334      RegionInfo *region = GetRegionInfo(class_id);
335      if (region->mapped_user == 0) continue;
336      Printf("  %02zd (%zd): total: %zd K allocs: %zd remains: %zd\n",
337             class_id,
338             SizeClassMap::Size(class_id),
339             region->mapped_user >> 10,
340             region->n_allocated,
341             region->n_allocated - region->n_freed);
342    }
343  }
344
345  typedef SizeClassMap SizeClassMapT;
346  static const uptr kNumClasses = SizeClassMap::kNumClasses;
347  static const uptr kNumClassesRounded = SizeClassMap::kNumClassesRounded;
348
349 private:
350  static const uptr kRegionSize = kSpaceSize / kNumClassesRounded;
351  static const uptr kSpaceEnd = kSpaceBeg + kSpaceSize;
352  COMPILER_CHECK(kSpaceBeg % kSpaceSize == 0);
353  // kRegionSize must be >= 2^32.
354  COMPILER_CHECK((kRegionSize) >= (1ULL << (SANITIZER_WORDSIZE / 2)));
355  // Populate the free list with at most this number of bytes at once
356  // or with one element if its size is greater.
357  static const uptr kPopulateSize = 1 << 15;
358  // Call mmap for user memory with at least this size.
359  static const uptr kUserMapSize = 1 << 15;
360  // Call mmap for metadata memory with at least this size.
361  static const uptr kMetaMapSize = 1 << 16;
362
363  struct RegionInfo {
364    SpinMutex mutex;
365    AllocatorFreeList free_list;
366    uptr allocated_user;  // Bytes allocated for user memory.
367    uptr allocated_meta;  // Bytes allocated for metadata.
368    uptr mapped_user;  // Bytes mapped for user memory.
369    uptr mapped_meta;  // Bytes mapped for metadata.
370    uptr n_allocated, n_freed;  // Just stats.
371  };
372  COMPILER_CHECK(sizeof(RegionInfo) >= kCacheLineSize);
373
374  static uptr AdditionalSize() {
375    return RoundUpTo(sizeof(RegionInfo) * kNumClassesRounded,
376                     GetPageSizeCached());
377  }
378
379  RegionInfo *GetRegionInfo(uptr class_id) {
380    CHECK_LT(class_id, kNumClasses);
381    RegionInfo *regions = reinterpret_cast<RegionInfo*>(kSpaceBeg + kSpaceSize);
382    return &regions[class_id];
383  }
384
385  static uptr GetChunkIdx(uptr chunk, uptr size) {
386    u32 offset = chunk % kRegionSize;
387    // Here we divide by a non-constant. This is costly.
388    // We require that kRegionSize is at least 2^32 so that offset is 32-bit.
389    // We save 2x by using 32-bit div, but may need to use a 256-way switch.
390    return offset / (u32)size;
391  }
392
393  void PopulateFreeList(uptr class_id, RegionInfo *region) {
394    CHECK(region->free_list.empty());
395    uptr size = SizeClassMap::Size(class_id);
396    uptr beg_idx = region->allocated_user;
397    uptr end_idx = beg_idx + kPopulateSize;
398    uptr region_beg = kSpaceBeg + kRegionSize * class_id;
399    if (end_idx + size > region->mapped_user) {
400      // Do the mmap for the user memory.
401      uptr map_size = kUserMapSize;
402      while (end_idx + size > region->mapped_user + map_size)
403        map_size += kUserMapSize;
404      CHECK_GE(region->mapped_user + map_size, end_idx);
405      MapWithCallback(region_beg + region->mapped_user, map_size);
406      region->mapped_user += map_size;
407    }
408    uptr idx = beg_idx;
409    uptr i = 0;
410    do {  // do-while loop because we need to put at least one item.
411      uptr p = region_beg + idx;
412      region->free_list.push_front(reinterpret_cast<AllocatorListNode*>(p));
413      idx += size;
414      i++;
415    } while (idx < end_idx);
416    region->allocated_user += idx - beg_idx;
417    CHECK_LE(region->allocated_user, region->mapped_user);
418    region->allocated_meta += i * kMetadataSize;
419    if (region->allocated_meta > region->mapped_meta) {
420      uptr map_size = kMetaMapSize;
421      while (region->allocated_meta > region->mapped_meta + map_size)
422        map_size += kMetaMapSize;
423      // Do the mmap for the metadata.
424      CHECK_GE(region->mapped_meta + map_size, region->allocated_meta);
425      MapWithCallback(region_beg + kRegionSize -
426                      region->mapped_meta - map_size, map_size);
427      region->mapped_meta += map_size;
428    }
429    CHECK_LE(region->allocated_meta, region->mapped_meta);
430    if (region->allocated_user + region->allocated_meta > kRegionSize) {
431      Printf("Out of memory. Dying.\n");
432      Printf("The process has exhausted %zuMB for size class %zu.\n",
433          kRegionSize / 1024 / 1024, size);
434      Die();
435    }
436  }
437
438  void *AllocateBySizeClass(uptr class_id) {
439    CHECK_LT(class_id, kNumClasses);
440    RegionInfo *region = GetRegionInfo(class_id);
441    SpinMutexLock l(&region->mutex);
442    if (region->free_list.empty()) {
443      PopulateFreeList(class_id, region);
444    }
445    CHECK(!region->free_list.empty());
446    AllocatorListNode *node = region->free_list.front();
447    region->free_list.pop_front();
448    region->n_allocated++;
449    return reinterpret_cast<void*>(node);
450  }
451
452  void DeallocateBySizeClass(void *p, uptr class_id) {
453    RegionInfo *region = GetRegionInfo(class_id);
454    SpinMutexLock l(&region->mutex);
455    region->free_list.push_front(reinterpret_cast<AllocatorListNode*>(p));
456    region->n_freed++;
457  }
458};
459
460// SizeClassAllocator32 -- allocator for 32-bit address space.
461// This allocator can theoretically be used on 64-bit arch, but there it is less
462// efficient than SizeClassAllocator64.
463//
464// [kSpaceBeg, kSpaceBeg + kSpaceSize) is the range of addresses which can
465// be returned by MmapOrDie().
466//
467// Region:
468//   a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize).
469// Since the regions are aligned by kRegionSize, there are exactly
470// kNumPossibleRegions possible regions in the address space and so we keep
471// an u8 array possible_regions[kNumPossibleRegions] to store the size classes.
472// 0 size class means the region is not used by the allocator.
473//
474// One Region is used to allocate chunks of a single size class.
475// A Region looks like this:
476// UserChunk1 .. UserChunkN <gap> MetaChunkN .. MetaChunk1
477//
478// In order to avoid false sharing the objects of this class should be
479// chache-line aligned.
480template <const uptr kSpaceBeg, const u64 kSpaceSize,
481          const uptr kMetadataSize, class SizeClassMap,
482          class MapUnmapCallback = NoOpMapUnmapCallback>
483class SizeClassAllocator32 {
484 public:
485  void Init() {
486    state_ = reinterpret_cast<State *>(MapWithCallback(sizeof(State)));
487  }
488
489  void *MapWithCallback(uptr size) {
490    size = RoundUpTo(size, GetPageSizeCached());
491    void *res = MmapOrDie(size, "SizeClassAllocator32");
492    MapUnmapCallback().OnMap((uptr)res, size);
493    return res;
494  }
495  void UnmapWithCallback(uptr beg, uptr size) {
496    MapUnmapCallback().OnUnmap(beg, size);
497    UnmapOrDie(reinterpret_cast<void *>(beg), size);
498  }
499
500  static bool CanAllocate(uptr size, uptr alignment) {
501    return size <= SizeClassMap::kMaxSize &&
502      alignment <= SizeClassMap::kMaxSize;
503  }
504
505  void *Allocate(uptr size, uptr alignment) {
506    if (size < alignment) size = alignment;
507    CHECK(CanAllocate(size, alignment));
508    return AllocateBySizeClass(ClassID(size));
509  }
510
511  void Deallocate(void *p) {
512    CHECK(PointerIsMine(p));
513    DeallocateBySizeClass(p, GetSizeClass(p));
514  }
515
516  void *GetMetaData(void *p) {
517    CHECK(PointerIsMine(p));
518    uptr mem = reinterpret_cast<uptr>(p);
519    uptr beg = ComputeRegionBeg(mem);
520    uptr size = SizeClassMap::Size(GetSizeClass(p));
521    u32 offset = mem - beg;
522    uptr n = offset / (u32)size;  // 32-bit division
523    uptr meta = (beg + kRegionSize) - (n + 1) * kMetadataSize;
524    return reinterpret_cast<void*>(meta);
525  }
526
527  // Allocate several chunks of the given class_id.
528  void BulkAllocate(uptr class_id, AllocatorFreeList *free_list) {
529    SizeClassInfo *sci = GetSizeClassInfo(class_id);
530    SpinMutexLock l(&sci->mutex);
531    EnsureSizeClassHasAvailableChunks(sci, class_id);
532    CHECK(!sci->free_list.empty());
533    BulkMove(SizeClassMap::MaxCached(class_id), &sci->free_list, free_list);
534  }
535
536  // Swallow the entire free_list for the given class_id.
537  void BulkDeallocate(uptr class_id, AllocatorFreeList *free_list) {
538    SizeClassInfo *sci = GetSizeClassInfo(class_id);
539    SpinMutexLock l(&sci->mutex);
540    sci->free_list.append_front(free_list);
541  }
542
543  bool PointerIsMine(void *p) {
544    return GetSizeClass(p) != 0;
545  }
546
547  uptr GetSizeClass(void *p) {
548    return state_->possible_regions[ComputeRegionId(reinterpret_cast<uptr>(p))];
549  }
550
551  void *GetBlockBegin(void *p) {
552    CHECK(PointerIsMine(p));
553    uptr mem = reinterpret_cast<uptr>(p);
554    uptr beg = ComputeRegionBeg(mem);
555    uptr size = SizeClassMap::Size(GetSizeClass(p));
556    u32 offset = mem - beg;
557    u32 n = offset / (u32)size;  // 32-bit division
558    uptr res = beg + (n * (u32)size);
559    return reinterpret_cast<void*>(res);
560  }
561
562  uptr GetActuallyAllocatedSize(void *p) {
563    CHECK(PointerIsMine(p));
564    return SizeClassMap::Size(GetSizeClass(p));
565  }
566
567  uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
568
569  uptr TotalMemoryUsed() {
570    // No need to lock here.
571    uptr res = 0;
572    for (uptr i = 0; i < kNumPossibleRegions; i++)
573      if (state_->possible_regions[i])
574        res += kRegionSize;
575    return res;
576  }
577
578  void TestOnlyUnmap() {
579    for (uptr i = 0; i < kNumPossibleRegions; i++)
580      if (state_->possible_regions[i])
581        UnmapWithCallback((i * kRegionSize), kRegionSize);
582    UnmapWithCallback(reinterpret_cast<uptr>(state_), sizeof(State));
583  }
584
585  void PrintStats() {
586  }
587
588  typedef SizeClassMap SizeClassMapT;
589  static const uptr kNumClasses = SizeClassMap::kNumClasses;
590
591 private:
592  static const uptr kRegionSizeLog = SANITIZER_WORDSIZE == 64 ? 24 : 20;
593  static const uptr kRegionSize = 1 << kRegionSizeLog;
594  static const uptr kNumPossibleRegions = kSpaceSize / kRegionSize;
595
596  struct SizeClassInfo {
597    SpinMutex mutex;
598    AllocatorFreeList free_list;
599    char padding[kCacheLineSize - sizeof(uptr) - sizeof(AllocatorFreeList)];
600  };
601  COMPILER_CHECK(sizeof(SizeClassInfo) == kCacheLineSize);
602
603  uptr ComputeRegionId(uptr mem) {
604    uptr res = mem >> kRegionSizeLog;
605    CHECK_LT(res, kNumPossibleRegions);
606    return res;
607  }
608
609  uptr ComputeRegionBeg(uptr mem) {
610    return mem & ~(kRegionSize - 1);
611  }
612
613  uptr AllocateRegion(uptr class_id) {
614    CHECK_LT(class_id, kNumClasses);
615    uptr res = reinterpret_cast<uptr>(MmapAlignedOrDie(kRegionSize, kRegionSize,
616                                      "SizeClassAllocator32"));
617    MapUnmapCallback().OnMap(res, kRegionSize);
618    CHECK_EQ(0U, (res & (kRegionSize - 1)));
619    CHECK_EQ(0U, state_->possible_regions[ComputeRegionId(res)]);
620    state_->possible_regions[ComputeRegionId(res)] = class_id;
621    return res;
622  }
623
624  SizeClassInfo *GetSizeClassInfo(uptr class_id) {
625    CHECK_LT(class_id, kNumClasses);
626    return &state_->size_class_info_array[class_id];
627  }
628
629  void EnsureSizeClassHasAvailableChunks(SizeClassInfo *sci, uptr class_id) {
630    if (!sci->free_list.empty()) return;
631    uptr size = SizeClassMap::Size(class_id);
632    uptr reg = AllocateRegion(class_id);
633    uptr n_chunks = kRegionSize / (size + kMetadataSize);
634    for (uptr i = reg; i < reg + n_chunks * size; i += size)
635      sci->free_list.push_back(reinterpret_cast<AllocatorListNode*>(i));
636  }
637
638  void *AllocateBySizeClass(uptr class_id) {
639    CHECK_LT(class_id, kNumClasses);
640    SizeClassInfo *sci = GetSizeClassInfo(class_id);
641    SpinMutexLock l(&sci->mutex);
642    EnsureSizeClassHasAvailableChunks(sci, class_id);
643    CHECK(!sci->free_list.empty());
644    AllocatorListNode *node = sci->free_list.front();
645    sci->free_list.pop_front();
646    return reinterpret_cast<void*>(node);
647  }
648
649  void DeallocateBySizeClass(void *p, uptr class_id) {
650    CHECK_LT(class_id, kNumClasses);
651    SizeClassInfo *sci = GetSizeClassInfo(class_id);
652    SpinMutexLock l(&sci->mutex);
653    sci->free_list.push_front(reinterpret_cast<AllocatorListNode*>(p));
654  }
655
656  struct State {
657    u8 possible_regions[kNumPossibleRegions];
658    SizeClassInfo size_class_info_array[kNumClasses];
659  };
660  State *state_;
661};
662
663// Objects of this type should be used as local caches for SizeClassAllocator64
664// or SizeClassAllocator32. Since the typical use of this class is to have one
665// object per thread in TLS, is has to be POD.
666template<class SizeClassAllocator>
667struct SizeClassAllocatorLocalCache {
668  typedef SizeClassAllocator Allocator;
669  static const uptr kNumClasses = SizeClassAllocator::kNumClasses;
670  // Don't need to call Init if the object is a global (i.e. zero-initialized).
671  void Init() {
672    internal_memset(this, 0, sizeof(*this));
673  }
674
675  void *Allocate(SizeClassAllocator *allocator, uptr class_id) {
676    CHECK_NE(class_id, 0UL);
677    CHECK_LT(class_id, kNumClasses);
678    AllocatorFreeList *free_list = &free_lists_[class_id];
679    if (free_list->empty())
680      allocator->BulkAllocate(class_id, free_list);
681    CHECK(!free_list->empty());
682    void *res = free_list->front();
683    free_list->pop_front();
684    return res;
685  }
686
687  void Deallocate(SizeClassAllocator *allocator, uptr class_id, void *p) {
688    CHECK_NE(class_id, 0UL);
689    CHECK_LT(class_id, kNumClasses);
690    AllocatorFreeList *free_list = &free_lists_[class_id];
691    free_list->push_front(reinterpret_cast<AllocatorListNode*>(p));
692    if (free_list->size() >= 2 * SizeClassMap::MaxCached(class_id))
693      DrainHalf(allocator, class_id);
694  }
695
696  void Drain(SizeClassAllocator *allocator) {
697    for (uptr i = 0; i < kNumClasses; i++) {
698      allocator->BulkDeallocate(i, &free_lists_[i]);
699      CHECK(free_lists_[i].empty());
700    }
701  }
702
703  // private:
704  typedef typename SizeClassAllocator::SizeClassMapT SizeClassMap;
705  AllocatorFreeList free_lists_[kNumClasses];
706
707  void DrainHalf(SizeClassAllocator *allocator, uptr class_id) {
708    AllocatorFreeList *free_list = &free_lists_[class_id];
709    AllocatorFreeList half;
710    half.clear();
711    const uptr count = free_list->size() / 2;
712    for (uptr i = 0; i < count; i++) {
713      AllocatorListNode *node = free_list->front();
714      free_list->pop_front();
715      half.push_front(node);
716    }
717    allocator->BulkDeallocate(class_id, &half);
718  }
719};
720
721// This class can (de)allocate only large chunks of memory using mmap/unmap.
722// The main purpose of this allocator is to cover large and rare allocation
723// sizes not covered by more efficient allocators (e.g. SizeClassAllocator64).
724template <class MapUnmapCallback = NoOpMapUnmapCallback>
725class LargeMmapAllocator {
726 public:
727  void Init() {
728    internal_memset(this, 0, sizeof(*this));
729    page_size_ = GetPageSizeCached();
730  }
731
732  void *Allocate(uptr size, uptr alignment) {
733    CHECK(IsPowerOfTwo(alignment));
734    uptr map_size = RoundUpMapSize(size);
735    if (alignment > page_size_)
736      map_size += alignment;
737    if (map_size < size) return 0;  // Overflow.
738    uptr map_beg = reinterpret_cast<uptr>(
739        MmapOrDie(map_size, "LargeMmapAllocator"));
740    MapUnmapCallback().OnMap(map_beg, map_size);
741    uptr map_end = map_beg + map_size;
742    uptr res = map_beg + page_size_;
743    if (res & (alignment - 1))  // Align.
744      res += alignment - (res & (alignment - 1));
745    CHECK_EQ(0, res & (alignment - 1));
746    CHECK_LE(res + size, map_end);
747    Header *h = GetHeader(res);
748    h->size = size;
749    h->map_beg = map_beg;
750    h->map_size = map_size;
751    uptr size_log = SANITIZER_WORDSIZE - __builtin_clzl(map_size) - 1;
752    CHECK_LT(size_log, ARRAY_SIZE(stats.by_size_log));
753    {
754      SpinMutexLock l(&mutex_);
755      uptr idx = n_chunks_++;
756      CHECK_LT(idx, kMaxNumChunks);
757      h->chunk_idx = idx;
758      chunks_[idx] = h;
759      stats.n_allocs++;
760      stats.currently_allocated += map_size;
761      stats.max_allocated = Max(stats.max_allocated, stats.currently_allocated);
762      stats.by_size_log[size_log]++;
763    }
764    return reinterpret_cast<void*>(res);
765  }
766
767  void Deallocate(void *p) {
768    Header *h = GetHeader(p);
769    {
770      SpinMutexLock l(&mutex_);
771      uptr idx = h->chunk_idx;
772      CHECK_EQ(chunks_[idx], h);
773      CHECK_LT(idx, n_chunks_);
774      chunks_[idx] = chunks_[n_chunks_ - 1];
775      chunks_[idx]->chunk_idx = idx;
776      n_chunks_--;
777      stats.n_frees++;
778      stats.currently_allocated -= h->map_size;
779    }
780    MapUnmapCallback().OnUnmap(h->map_beg, h->map_size);
781    UnmapOrDie(reinterpret_cast<void*>(h->map_beg), h->map_size);
782  }
783
784  uptr TotalMemoryUsed() {
785    SpinMutexLock l(&mutex_);
786    uptr res = 0;
787    for (uptr i = 0; i < n_chunks_; i++) {
788      Header *h = chunks_[i];
789      CHECK_EQ(h->chunk_idx, i);
790      res += RoundUpMapSize(h->size);
791    }
792    return res;
793  }
794
795  bool PointerIsMine(void *p) {
796    return GetBlockBegin(p) != 0;
797  }
798
799  uptr GetActuallyAllocatedSize(void *p) {
800    return RoundUpTo(GetHeader(p)->size, page_size_);
801  }
802
803  // At least page_size_/2 metadata bytes is available.
804  void *GetMetaData(void *p) {
805    // Too slow: CHECK_EQ(p, GetBlockBegin(p));
806    CHECK(IsAligned(reinterpret_cast<uptr>(p), page_size_));
807    return GetHeader(p) + 1;
808  }
809
810  void *GetBlockBegin(void *ptr) {
811    uptr p = reinterpret_cast<uptr>(ptr);
812    SpinMutexLock l(&mutex_);
813    uptr nearest_chunk = 0;
814    // Cache-friendly linear search.
815    for (uptr i = 0; i < n_chunks_; i++) {
816      uptr ch = reinterpret_cast<uptr>(chunks_[i]);
817      if (p < ch) continue;  // p is at left to this chunk, skip it.
818      if (p - ch < p - nearest_chunk)
819        nearest_chunk = ch;
820    }
821    if (!nearest_chunk)
822      return 0;
823    Header *h = reinterpret_cast<Header *>(nearest_chunk);
824    CHECK_GE(nearest_chunk, h->map_beg);
825    CHECK_LT(nearest_chunk, h->map_beg + h->map_size);
826    CHECK_LE(nearest_chunk, p);
827    if (h->map_beg + h->map_size < p)
828      return 0;
829    return GetUser(h);
830  }
831
832  void PrintStats() {
833    Printf("Stats: LargeMmapAllocator: allocated %zd times, "
834           "remains %zd (%zd K) max %zd M; by size logs: ",
835           stats.n_allocs, stats.n_allocs - stats.n_frees,
836           stats.currently_allocated >> 10, stats.max_allocated >> 20);
837    for (uptr i = 0; i < ARRAY_SIZE(stats.by_size_log); i++) {
838      uptr c = stats.by_size_log[i];
839      if (!c) continue;
840      Printf("%zd:%zd; ", i, c);
841    }
842    Printf("\n");
843  }
844
845 private:
846  static const int kMaxNumChunks = 1 << FIRST_32_SECOND_64(15, 18);
847  struct Header {
848    uptr map_beg;
849    uptr map_size;
850    uptr size;
851    uptr chunk_idx;
852  };
853
854  Header *GetHeader(uptr p) {
855    CHECK_EQ(p % page_size_, 0);
856    return reinterpret_cast<Header*>(p - page_size_);
857  }
858  Header *GetHeader(void *p) { return GetHeader(reinterpret_cast<uptr>(p)); }
859
860  void *GetUser(Header *h) {
861    CHECK_EQ((uptr)h % page_size_, 0);
862    return reinterpret_cast<void*>(reinterpret_cast<uptr>(h) + page_size_);
863  }
864
865  uptr RoundUpMapSize(uptr size) {
866    return RoundUpTo(size, page_size_) + page_size_;
867  }
868
869  uptr page_size_;
870  Header *chunks_[kMaxNumChunks];
871  uptr n_chunks_;
872  struct Stats {
873    uptr n_allocs, n_frees, currently_allocated, max_allocated, by_size_log[64];
874  } stats;
875  SpinMutex mutex_;
876};
877
878// This class implements a complete memory allocator by using two
879// internal allocators:
880// PrimaryAllocator is efficient, but may not allocate some sizes (alignments).
881//  When allocating 2^x bytes it should return 2^x aligned chunk.
882// PrimaryAllocator is used via a local AllocatorCache.
883// SecondaryAllocator can allocate anything, but is not efficient.
884template <class PrimaryAllocator, class AllocatorCache,
885          class SecondaryAllocator>  // NOLINT
886class CombinedAllocator {
887 public:
888  void Init() {
889    primary_.Init();
890    secondary_.Init();
891  }
892
893  void *Allocate(AllocatorCache *cache, uptr size, uptr alignment,
894                 bool cleared = false) {
895    // Returning 0 on malloc(0) may break a lot of code.
896    if (size == 0)
897      size = 1;
898    if (size + alignment < size)
899      return 0;
900    if (alignment > 8)
901      size = RoundUpTo(size, alignment);
902    void *res;
903    if (primary_.CanAllocate(size, alignment))
904      res = cache->Allocate(&primary_, primary_.ClassID(size));
905    else
906      res = secondary_.Allocate(size, alignment);
907    if (alignment > 8)
908      CHECK_EQ(reinterpret_cast<uptr>(res) & (alignment - 1), 0);
909    if (cleared && res)
910      internal_memset(res, 0, size);
911    return res;
912  }
913
914  void Deallocate(AllocatorCache *cache, void *p) {
915    if (!p) return;
916    if (primary_.PointerIsMine(p))
917      cache->Deallocate(&primary_, primary_.GetSizeClass(p), p);
918    else
919      secondary_.Deallocate(p);
920  }
921
922  void *Reallocate(AllocatorCache *cache, void *p, uptr new_size,
923                   uptr alignment) {
924    if (!p)
925      return Allocate(cache, new_size, alignment);
926    if (!new_size) {
927      Deallocate(cache, p);
928      return 0;
929    }
930    CHECK(PointerIsMine(p));
931    uptr old_size = GetActuallyAllocatedSize(p);
932    uptr memcpy_size = Min(new_size, old_size);
933    void *new_p = Allocate(cache, new_size, alignment);
934    if (new_p)
935      internal_memcpy(new_p, p, memcpy_size);
936    Deallocate(cache, p);
937    return new_p;
938  }
939
940  bool PointerIsMine(void *p) {
941    if (primary_.PointerIsMine(p))
942      return true;
943    return secondary_.PointerIsMine(p);
944  }
945
946  bool FromPrimary(void *p) {
947    return primary_.PointerIsMine(p);
948  }
949
950  void *GetMetaData(void *p) {
951    if (primary_.PointerIsMine(p))
952      return primary_.GetMetaData(p);
953    return secondary_.GetMetaData(p);
954  }
955
956  void *GetBlockBegin(void *p) {
957    if (primary_.PointerIsMine(p))
958      return primary_.GetBlockBegin(p);
959    return secondary_.GetBlockBegin(p);
960  }
961
962  uptr GetActuallyAllocatedSize(void *p) {
963    if (primary_.PointerIsMine(p))
964      return primary_.GetActuallyAllocatedSize(p);
965    return secondary_.GetActuallyAllocatedSize(p);
966  }
967
968  uptr TotalMemoryUsed() {
969    return primary_.TotalMemoryUsed() + secondary_.TotalMemoryUsed();
970  }
971
972  void TestOnlyUnmap() { primary_.TestOnlyUnmap(); }
973
974  void SwallowCache(AllocatorCache *cache) {
975    cache->Drain(&primary_);
976  }
977
978  void PrintStats() {
979    primary_.PrintStats();
980    secondary_.PrintStats();
981  }
982
983 private:
984  PrimaryAllocator primary_;
985  SecondaryAllocator secondary_;
986};
987
988}  // namespace __sanitizer
989
990#endif  // SANITIZER_ALLOCATOR_H
991
992