1/*
2 * Copyright (C) 2013 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "config.h"
32#include "wtf/PartitionAlloc.h"
33
34#include <string.h>
35
36#ifndef NDEBUG
37#include <stdio.h>
38#endif
39
40// Two partition pages are used as guard / metadata page so make sure the super
41// page size is bigger.
42COMPILE_ASSERT(WTF::kPartitionPageSize * 4 <= WTF::kSuperPageSize, ok_super_page_size);
43COMPILE_ASSERT(!(WTF::kSuperPageSize % WTF::kPartitionPageSize), ok_super_page_multiple);
44// Four system pages gives us room to hack out a still-guard-paged piece
45// of metadata in the middle of a guard partition page.
46COMPILE_ASSERT(WTF::kSystemPageSize * 4 <= WTF::kPartitionPageSize, ok_partition_page_size);
47COMPILE_ASSERT(!(WTF::kPartitionPageSize % WTF::kSystemPageSize), ok_partition_page_multiple);
48COMPILE_ASSERT(sizeof(WTF::PartitionPage) <= WTF::kPageMetadataSize, PartitionPage_not_too_big);
49COMPILE_ASSERT(sizeof(WTF::PartitionBucket) <= WTF::kPageMetadataSize, PartitionBucket_not_too_big);
50COMPILE_ASSERT(sizeof(WTF::PartitionSuperPageExtentEntry) <= WTF::kPageMetadataSize, PartitionSuperPageExtentEntry_not_too_big);
51COMPILE_ASSERT(WTF::kPageMetadataSize * WTF::kNumPartitionPagesPerSuperPage <= WTF::kSystemPageSize, page_metadata_fits_in_hole);
52// Check that some of our zanier calculations worked out as expected.
53COMPILE_ASSERT(WTF::kGenericSmallestBucket == 8, generic_smallest_bucket);
54COMPILE_ASSERT(WTF::kGenericMaxBucketed == 983040, generic_max_bucketed);
55
56namespace WTF {
57
58int PartitionRootBase::gInitializedLock = 0;
59bool PartitionRootBase::gInitialized = false;
60PartitionPage PartitionRootBase::gSeedPage;
61PartitionBucket PartitionRootBase::gPagedBucket;
62
63static size_t partitionBucketNumSystemPages(size_t size)
64{
65    // This works out reasonably for the current bucket sizes of the generic
66    // allocator, and the current values of partition page size and constants.
67    // Specifically, we have enough room to always pack the slots perfectly into
68    // some number of system pages. The only waste is the waste associated with
69    // unfaulted pages (i.e. wasted address space).
70    // TODO: we end up using a lot of system pages for very small sizes. For
71    // example, we'll use 12 system pages for slot size 24. The slot size is
72    // so small that the waste would be tiny with just 4, or 1, system pages.
73    // Later, we can investigate whether there are anti-fragmentation benefits
74    // to using fewer system pages.
75    double bestWasteRatio = 1.0f;
76    size_t bestPages = 0;
77    if (size > kMaxSystemPagesPerSlotSpan * kSystemPageSize) {
78        ASSERT(!(size % kSystemPageSize));
79        return size / kSystemPageSize;
80    }
81    ASSERT(size <= kMaxSystemPagesPerSlotSpan * kSystemPageSize);
82    for (size_t i = kNumSystemPagesPerPartitionPage - 1; i <= kMaxSystemPagesPerSlotSpan; ++i) {
83        size_t pageSize = kSystemPageSize * i;
84        size_t numSlots = pageSize / size;
85        size_t waste = pageSize - (numSlots * size);
86        // Leaving a page unfaulted is not free; the page will occupy an empty page table entry.
87        // Make a simple attempt to account for that.
88        size_t numRemainderPages = i & (kNumSystemPagesPerPartitionPage - 1);
89        size_t numUnfaultedPages = numRemainderPages ? (kNumSystemPagesPerPartitionPage - numRemainderPages) : 0;
90        waste += sizeof(void*) * numUnfaultedPages;
91        double wasteRatio = (double) waste / (double) pageSize;
92        if (wasteRatio < bestWasteRatio) {
93            bestWasteRatio = wasteRatio;
94            bestPages = i;
95        }
96    }
97    ASSERT(bestPages > 0);
98    return bestPages;
99}
100
101static void parititonAllocBaseInit(PartitionRootBase* root)
102{
103    ASSERT(!root->initialized);
104
105    spinLockLock(&PartitionRootBase::gInitializedLock);
106    if (!PartitionRootBase::gInitialized) {
107        PartitionRootBase::gInitialized = true;
108        // We mark the seed page as free to make sure it is skipped by our
109        // logic to find a new active page.
110        PartitionRootBase::gPagedBucket.activePagesHead = &PartitionRootGeneric::gSeedPage;
111    }
112    spinLockUnlock(&PartitionRootBase::gInitializedLock);
113
114    root->initialized = true;
115    root->totalSizeOfCommittedPages = 0;
116    root->totalSizeOfSuperPages = 0;
117    root->nextSuperPage = 0;
118    root->nextPartitionPage = 0;
119    root->nextPartitionPageEnd = 0;
120    root->firstExtent = 0;
121    root->currentExtent = 0;
122
123    memset(&root->globalEmptyPageRing, '\0', sizeof(root->globalEmptyPageRing));
124    root->globalEmptyPageRingIndex = 0;
125
126    // This is a "magic" value so we can test if a root pointer is valid.
127    root->invertedSelf = ~reinterpret_cast<uintptr_t>(root);
128}
129
130static void partitionBucketInitBase(PartitionBucket* bucket, PartitionRootBase* root)
131{
132    bucket->activePagesHead = &PartitionRootGeneric::gSeedPage;
133    bucket->freePagesHead = 0;
134    bucket->numFullPages = 0;
135    bucket->numSystemPagesPerSlotSpan = partitionBucketNumSystemPages(bucket->slotSize);
136}
137
138void partitionAllocInit(PartitionRoot* root, size_t numBuckets, size_t maxAllocation)
139{
140    parititonAllocBaseInit(root);
141
142    root->numBuckets = numBuckets;
143    root->maxAllocation = maxAllocation;
144    size_t i;
145    for (i = 0; i < root->numBuckets; ++i) {
146        PartitionBucket* bucket = &root->buckets()[i];
147        if (!i)
148            bucket->slotSize = kAllocationGranularity;
149        else
150            bucket->slotSize = i << kBucketShift;
151        partitionBucketInitBase(bucket, root);
152    }
153}
154
155void partitionAllocGenericInit(PartitionRootGeneric* root)
156{
157    parititonAllocBaseInit(root);
158
159    root->lock = 0;
160
161    // Precalculate some shift and mask constants used in the hot path.
162    // Example: malloc(41) == 101001 binary.
163    // Order is 6 (1 << 6-1)==32 is highest bit set.
164    // orderIndex is the next three MSB == 010 == 2.
165    // subOrderIndexMask is a mask for the remaining bits == 11 (masking to 01 for the subOrderIndex).
166    size_t order;
167    for (order = 0; order <= kBitsPerSizet; ++order) {
168        size_t orderIndexShift;
169        if (order < kGenericNumBucketsPerOrderBits + 1)
170            orderIndexShift = 0;
171        else
172            orderIndexShift = order - (kGenericNumBucketsPerOrderBits + 1);
173        root->orderIndexShifts[order] = orderIndexShift;
174        size_t subOrderIndexMask;
175        if (order == kBitsPerSizet) {
176            // This avoids invoking undefined behavior for an excessive shift.
177            subOrderIndexMask = static_cast<size_t>(-1) >> (kGenericNumBucketsPerOrderBits + 1);
178        } else {
179            subOrderIndexMask = ((1 << order) - 1) >> (kGenericNumBucketsPerOrderBits + 1);
180        }
181        root->orderSubIndexMasks[order] = subOrderIndexMask;
182    }
183
184    // Set up the actual usable buckets first.
185    // Note that typical values (i.e. min allocation size of 8) will result in
186    // invalid buckets (size==9 etc. or more generally, size is not a multiple
187    // of the smallest allocation granularity).
188    // We avoid them in the bucket lookup map, but we tolerate them to keep the
189    // code simpler and the structures more generic.
190    size_t i, j;
191    size_t currentSize = kGenericSmallestBucket;
192    size_t currentIncrement = kGenericSmallestBucket >> kGenericNumBucketsPerOrderBits;
193    PartitionBucket* bucket = &root->buckets[0];
194    for (i = 0; i < kGenericNumBucketedOrders; ++i) {
195        for (j = 0; j < kGenericNumBucketsPerOrder; ++j) {
196            bucket->slotSize = currentSize;
197            partitionBucketInitBase(bucket, root);
198            // Disable invalid buckets so that touching them faults.
199            if (currentSize % kGenericSmallestBucket)
200                bucket->activePagesHead = 0;
201            currentSize += currentIncrement;
202            ++bucket;
203        }
204        currentIncrement <<= 1;
205    }
206    ASSERT(currentSize == 1 << kGenericMaxBucketedOrder);
207    ASSERT(bucket == &root->buckets[0] + (kGenericNumBucketedOrders * kGenericNumBucketsPerOrder));
208
209    // Then set up the fast size -> bucket lookup table.
210    bucket = &root->buckets[0];
211    PartitionBucket** bucketPtr = &root->bucketLookups[0];
212    for (order = 0; order <= kBitsPerSizet; ++order) {
213        for (j = 0; j < kGenericNumBucketsPerOrder; ++j) {
214            if (order < kGenericMinBucketedOrder) {
215                // Use the bucket of finest granularity for malloc(0) etc.
216                *bucketPtr++ = &root->buckets[0];
217            } else if (order > kGenericMaxBucketedOrder) {
218                *bucketPtr++ = &PartitionRootGeneric::gPagedBucket;
219            } else {
220                PartitionBucket* validBucket = bucket;
221                // Skip over invalid buckets.
222                while (validBucket->slotSize % kGenericSmallestBucket)
223                    validBucket++;
224                *bucketPtr++ = validBucket;
225                bucket++;
226            }
227        }
228    }
229    ASSERT(bucket == &root->buckets[0] + (kGenericNumBucketedOrders * kGenericNumBucketsPerOrder));
230    ASSERT(bucketPtr == &root->bucketLookups[0] + ((kBitsPerSizet + 1) * kGenericNumBucketsPerOrder));
231    // And there's one last bucket lookup that will be hit for e.g. malloc(-1),
232    // which tries to overflow to a non-existant order.
233    *bucketPtr = &PartitionRootGeneric::gPagedBucket;
234}
235
236static bool partitionAllocShutdownBucket(PartitionBucket* bucket)
237{
238    // Failure here indicates a memory leak.
239    bool noLeaks = !bucket->numFullPages;
240    PartitionPage* page = bucket->activePagesHead;
241    while (page) {
242        if (page->numAllocatedSlots)
243            noLeaks = false;
244        page = page->nextPage;
245    }
246
247    return noLeaks;
248}
249
250static void partitionAllocBaseShutdown(PartitionRootBase* root)
251{
252    ASSERT(root->initialized);
253    root->initialized = false;
254
255    // Now that we've examined all partition pages in all buckets, it's safe
256    // to free all our super pages. We first collect the super page pointers
257    // on the stack because some of them are themselves store in super pages.
258    char* superPages[kMaxPartitionSize / kSuperPageSize];
259    size_t numSuperPages = 0;
260    PartitionSuperPageExtentEntry* entry = root->firstExtent;
261    while (entry) {
262        char* superPage = entry->superPageBase;
263        while (superPage != entry->superPagesEnd) {
264            superPages[numSuperPages] = superPage;
265            numSuperPages++;
266            superPage += kSuperPageSize;
267        }
268        entry = entry->next;
269    }
270    ASSERT(numSuperPages == root->totalSizeOfSuperPages / kSuperPageSize);
271    for (size_t i = 0; i < numSuperPages; ++i)
272        freePages(superPages[i], kSuperPageSize);
273}
274
275bool partitionAllocShutdown(PartitionRoot* root)
276{
277    bool noLeaks = true;
278    size_t i;
279    for (i = 0; i < root->numBuckets; ++i) {
280        PartitionBucket* bucket = &root->buckets()[i];
281        if (!partitionAllocShutdownBucket(bucket))
282            noLeaks = false;
283    }
284
285    partitionAllocBaseShutdown(root);
286    return noLeaks;
287}
288
289bool partitionAllocGenericShutdown(PartitionRootGeneric* root)
290{
291    bool noLeaks = true;
292    size_t i;
293    for (i = 0; i < kGenericNumBucketedOrders * kGenericNumBucketsPerOrder; ++i) {
294        PartitionBucket* bucket = &root->buckets[i];
295        if (!partitionAllocShutdownBucket(bucket))
296            noLeaks = false;
297    }
298    partitionAllocBaseShutdown(root);
299    return noLeaks;
300}
301
302static NEVER_INLINE void partitionOutOfMemory()
303{
304    IMMEDIATE_CRASH();
305}
306
307static NEVER_INLINE void partitionFull()
308{
309    IMMEDIATE_CRASH();
310}
311
312static ALWAYS_INLINE void partitionDecommitSystemPages(PartitionRootBase* root, void* addr, size_t len)
313{
314    decommitSystemPages(addr, len);
315    ASSERT(root->totalSizeOfCommittedPages > len);
316    root->totalSizeOfCommittedPages -= len;
317}
318
319static ALWAYS_INLINE void partitionRecommitSystemPages(PartitionRootBase* root, void* addr, size_t len)
320{
321    recommitSystemPages(addr, len);
322    root->totalSizeOfCommittedPages += len;
323}
324
325static ALWAYS_INLINE void* partitionAllocPartitionPages(PartitionRootBase* root, int flags, size_t numPartitionPages)
326{
327    ASSERT(!(reinterpret_cast<uintptr_t>(root->nextPartitionPage) % kPartitionPageSize));
328    ASSERT(!(reinterpret_cast<uintptr_t>(root->nextPartitionPageEnd) % kPartitionPageSize));
329    RELEASE_ASSERT(numPartitionPages <= kNumPartitionPagesPerSuperPage);
330    size_t totalSize = kPartitionPageSize * numPartitionPages;
331    root->totalSizeOfCommittedPages += totalSize;
332    size_t numPartitionPagesLeft = (root->nextPartitionPageEnd - root->nextPartitionPage) >> kPartitionPageShift;
333    if (LIKELY(numPartitionPagesLeft >= numPartitionPages)) {
334        // In this case, we can still hand out pages from the current super page
335        // allocation.
336        char* ret = root->nextPartitionPage;
337        root->nextPartitionPage += totalSize;
338        return ret;
339    }
340
341    // Need a new super page.
342    root->totalSizeOfSuperPages += kSuperPageSize;
343    if (root->totalSizeOfSuperPages > kMaxPartitionSize)
344        partitionFull();
345    char* requestedAddress = root->nextSuperPage;
346    char* superPage = reinterpret_cast<char*>(allocPages(requestedAddress, kSuperPageSize, kSuperPageSize));
347    if (UNLIKELY(!superPage)) {
348        if (flags & PartitionAllocReturnNull)
349            return 0;
350        partitionOutOfMemory();
351    }
352    root->nextSuperPage = superPage + kSuperPageSize;
353    char* ret = superPage + kPartitionPageSize;
354    root->nextPartitionPage = ret + totalSize;
355    root->nextPartitionPageEnd = root->nextSuperPage - kPartitionPageSize;
356    // Make the first partition page in the super page a guard page, but leave a
357    // hole in the middle.
358    // This is where we put page metadata and also a tiny amount of extent
359    // metadata.
360    setSystemPagesInaccessible(superPage, kSystemPageSize);
361    setSystemPagesInaccessible(superPage + (kSystemPageSize * 2), kPartitionPageSize - (kSystemPageSize * 2));
362    // Also make the last partition page a guard page.
363    setSystemPagesInaccessible(superPage + (kSuperPageSize - kPartitionPageSize), kPartitionPageSize);
364
365    // If we were after a specific address, but didn't get it, assume that
366    // the system chose a lousy address and re-randomize the next
367    // allocation.
368    if (requestedAddress && requestedAddress != superPage)
369        root->nextSuperPage = 0;
370
371    // We allocated a new super page so update super page metadata.
372    // First check if this is a new extent or not.
373    PartitionSuperPageExtentEntry* latestExtent = reinterpret_cast<PartitionSuperPageExtentEntry*>(partitionSuperPageToMetadataArea(superPage));
374    PartitionSuperPageExtentEntry* currentExtent = root->currentExtent;
375    bool isNewExtent = (superPage != requestedAddress);
376    if (UNLIKELY(isNewExtent)) {
377        latestExtent->next = 0;
378        if (UNLIKELY(!currentExtent)) {
379            root->firstExtent = latestExtent;
380        } else {
381            ASSERT(currentExtent->superPageBase);
382            currentExtent->next = latestExtent;
383        }
384        root->currentExtent = latestExtent;
385        currentExtent = latestExtent;
386        currentExtent->superPageBase = superPage;
387        currentExtent->superPagesEnd = superPage + kSuperPageSize;
388    } else {
389        // We allocated next to an existing extent so just nudge the size up a little.
390        currentExtent->superPagesEnd += kSuperPageSize;
391        ASSERT(ret >= currentExtent->superPageBase && ret < currentExtent->superPagesEnd);
392    }
393    // By storing the root in every extent metadata object, we have a fast way
394    // to go from a pointer within the partition to the root object.
395    latestExtent->root = root;
396
397    return ret;
398}
399
400static ALWAYS_INLINE void partitionUnusePage(PartitionRootBase* root, PartitionPage* page)
401{
402    ASSERT(page->bucket->numSystemPagesPerSlotSpan);
403    void* addr = partitionPageToPointer(page);
404    partitionDecommitSystemPages(root, addr, page->bucket->numSystemPagesPerSlotSpan * kSystemPageSize);
405}
406
407static ALWAYS_INLINE size_t partitionBucketSlots(const PartitionBucket* bucket)
408{
409    return (bucket->numSystemPagesPerSlotSpan * kSystemPageSize) / bucket->slotSize;
410}
411
412static ALWAYS_INLINE size_t partitionBucketPartitionPages(const PartitionBucket* bucket)
413{
414    return (bucket->numSystemPagesPerSlotSpan + (kNumSystemPagesPerPartitionPage - 1)) / kNumSystemPagesPerPartitionPage;
415}
416
417static ALWAYS_INLINE void partitionPageReset(PartitionPage* page, PartitionBucket* bucket)
418{
419    ASSERT(page != &PartitionRootGeneric::gSeedPage);
420    page->numAllocatedSlots = 0;
421    page->numUnprovisionedSlots = partitionBucketSlots(bucket);
422    ASSERT(page->numUnprovisionedSlots);
423    page->bucket = bucket;
424    page->nextPage = 0;
425    // NULLing the freelist is not strictly necessary but it makes an ASSERT in partitionPageFillFreelist simpler.
426    page->freelistHead = 0;
427    page->pageOffset = 0;
428    page->freeCacheIndex = -1;
429    size_t numPartitionPages = partitionBucketPartitionPages(bucket);
430    size_t i;
431    char* pageCharPtr = reinterpret_cast<char*>(page);
432    for (i = 1; i < numPartitionPages; ++i) {
433        pageCharPtr += kPageMetadataSize;
434        PartitionPage* secondaryPage = reinterpret_cast<PartitionPage*>(pageCharPtr);
435        secondaryPage->pageOffset = i;
436    }
437}
438
439static ALWAYS_INLINE char* partitionPageAllocAndFillFreelist(PartitionPage* page)
440{
441    ASSERT(page != &PartitionRootGeneric::gSeedPage);
442    size_t numSlots = page->numUnprovisionedSlots;
443    ASSERT(numSlots);
444    PartitionBucket* bucket = page->bucket;
445    // We should only get here when _every_ slot is either used or unprovisioned.
446    // (The third state is "on the freelist". If we have a non-empty freelist, we should not get here.)
447    ASSERT(numSlots + page->numAllocatedSlots == partitionBucketSlots(bucket));
448    // Similarly, make explicitly sure that the freelist is empty.
449    ASSERT(!page->freelistHead);
450    ASSERT(page->numAllocatedSlots >= 0);
451
452    size_t size = bucket->slotSize;
453    char* base = reinterpret_cast<char*>(partitionPageToPointer(page));
454    char* returnObject = base + (size * page->numAllocatedSlots);
455    char* firstFreelistPointer = returnObject + size;
456    char* firstFreelistPointerExtent = firstFreelistPointer + sizeof(PartitionFreelistEntry*);
457    // Our goal is to fault as few system pages as possible. We calculate the
458    // page containing the "end" of the returned slot, and then allow freelist
459    // pointers to be written up to the end of that page.
460    char* subPageLimit = reinterpret_cast<char*>((reinterpret_cast<uintptr_t>(firstFreelistPointer) + kSystemPageOffsetMask) & kSystemPageBaseMask);
461    char* slotsLimit = returnObject + (size * page->numUnprovisionedSlots);
462    char* freelistLimit = subPageLimit;
463    if (UNLIKELY(slotsLimit < freelistLimit))
464        freelistLimit = slotsLimit;
465
466    size_t numNewFreelistEntries = 0;
467    if (LIKELY(firstFreelistPointerExtent <= freelistLimit)) {
468        // Only consider used space in the slot span. If we consider wasted
469        // space, we may get an off-by-one when a freelist pointer fits in the
470        // wasted space, but a slot does not.
471        // We know we can fit at least one freelist pointer.
472        numNewFreelistEntries = 1;
473        // Any further entries require space for the whole slot span.
474        numNewFreelistEntries += (freelistLimit - firstFreelistPointerExtent) / size;
475    }
476
477    // We always return an object slot -- that's the +1 below.
478    // We do not neccessarily create any new freelist entries, because we cross sub page boundaries frequently for large bucket sizes.
479    ASSERT(numNewFreelistEntries + 1 <= numSlots);
480    numSlots -= (numNewFreelistEntries + 1);
481    page->numUnprovisionedSlots = numSlots;
482    page->numAllocatedSlots++;
483
484    if (LIKELY(numNewFreelistEntries)) {
485        char* freelistPointer = firstFreelistPointer;
486        PartitionFreelistEntry* entry = reinterpret_cast<PartitionFreelistEntry*>(freelistPointer);
487        page->freelistHead = entry;
488        while (--numNewFreelistEntries) {
489            freelistPointer += size;
490            PartitionFreelistEntry* nextEntry = reinterpret_cast<PartitionFreelistEntry*>(freelistPointer);
491            entry->next = partitionFreelistMask(nextEntry);
492            entry = nextEntry;
493        }
494        entry->next = partitionFreelistMask(0);
495    } else {
496        page->freelistHead = 0;
497    }
498    return returnObject;
499}
500
501// This helper function scans the active page list for a suitable new active
502// page, starting at the passed in page.
503// When it finds a suitable new active page (one that has free slots), it is
504// set as the new active page and true is returned. If there is no suitable new
505// active page, false is returned and the current active page is set to null.
506// As potential pages are scanned, they are tidied up according to their state.
507// Freed pages are swept on to the free page list and full pages are unlinked
508// from any list.
509static ALWAYS_INLINE bool partitionSetNewActivePage(PartitionPage* page)
510{
511    if (page == &PartitionRootBase::gSeedPage) {
512        ASSERT(!page->nextPage);
513        return false;
514    }
515
516    PartitionPage* nextPage = 0;
517    PartitionBucket* bucket = page->bucket;
518
519    for (; page; page = nextPage) {
520        nextPage = page->nextPage;
521        ASSERT(page->bucket == bucket);
522        ASSERT(page != bucket->freePagesHead);
523        ASSERT(!bucket->freePagesHead || page != bucket->freePagesHead->nextPage);
524
525        // Page is usable if it has something on the freelist, or unprovisioned
526        // slots that can be turned into a freelist.
527        if (LIKELY(page->freelistHead != 0) || LIKELY(page->numUnprovisionedSlots)) {
528            bucket->activePagesHead = page;
529            return true;
530        }
531
532        ASSERT(page->numAllocatedSlots >= 0);
533        if (LIKELY(page->numAllocatedSlots == 0)) {
534            ASSERT(page->freeCacheIndex == -1);
535            // We hit a free page, so shepherd it on to the free page list.
536            page->nextPage = bucket->freePagesHead;
537            bucket->freePagesHead = page;
538        } else {
539            // If we get here, we found a full page. Skip over it too, and also
540            // tag it as full (via a negative value). We need it tagged so that
541            // free'ing can tell, and move it back into the active page list.
542            ASSERT(page->numAllocatedSlots == static_cast<int>(partitionBucketSlots(bucket)));
543            page->numAllocatedSlots = -page->numAllocatedSlots;
544            ++bucket->numFullPages;
545            // numFullPages is a uint16_t for efficient packing so guard against
546            // overflow to be safe.
547            RELEASE_ASSERT(bucket->numFullPages);
548            // Not necessary but might help stop accidents.
549            page->nextPage = 0;
550        }
551    }
552
553    bucket->activePagesHead = 0;
554    return false;
555}
556
557struct PartitionDirectMapExtent {
558    size_t mapSize; // Mapped size, not including guard pages and meta-data.
559};
560
561static ALWAYS_INLINE PartitionDirectMapExtent* partitionPageToDirectMapExtent(PartitionPage* page)
562{
563    ASSERT(partitionBucketIsDirectMapped(page->bucket));
564    return reinterpret_cast<PartitionDirectMapExtent*>(reinterpret_cast<char*>(page) + 2 * kPageMetadataSize);
565}
566
567static ALWAYS_INLINE void* partitionDirectMap(PartitionRootBase* root, int flags, size_t size)
568{
569    size = partitionDirectMapSize(size);
570
571    // Because we need to fake looking like a super page, We need to allocate
572    // a bunch of system pages more than "size":
573    // - The first few system pages are the partition page in which the super
574    // page metadata is stored. We fault just one system page out of a partition
575    // page sized clump.
576    // - We add a trailing guard page.
577    size_t mapSize = size + kPartitionPageSize + kSystemPageSize;
578    // Round up to the allocation granularity.
579    mapSize += kPageAllocationGranularityOffsetMask;
580    mapSize &= kPageAllocationGranularityBaseMask;
581
582    // TODO: we may want to let the operating system place these allocations
583    // where it pleases. On 32-bit, this might limit address space
584    // fragmentation and on 64-bit, this might have useful savings for TLB
585    // and page table overhead.
586    // TODO: if upsizing realloc()s are common on large sizes, we could
587    // consider over-allocating address space on 64-bit, "just in case".
588    // TODO: consider pre-populating page tables (e.g. MAP_POPULATE on Linux,
589    // MADV_WILLNEED on POSIX).
590    // TODO: these pages will be zero-filled. Consider internalizing an
591    // allocZeroed() API so we can avoid a memset() entirely in this case.
592    char* ptr = reinterpret_cast<char*>(allocPages(0, mapSize, kSuperPageSize));
593    if (!ptr) {
594        if (flags & PartitionAllocReturnNull)
595            return 0;
596        partitionOutOfMemory();
597    }
598    char* ret = ptr + kPartitionPageSize;
599    // TODO: due to all the guard paging, this arrangement creates 4 mappings.
600    // We could get it down to three by using read-only for the metadata page,
601    // or perhaps two by leaving out the trailing guard page on 64-bit.
602    setSystemPagesInaccessible(ptr, kSystemPageSize);
603    setSystemPagesInaccessible(ptr + (kSystemPageSize * 2), kPartitionPageSize - (kSystemPageSize * 2));
604    setSystemPagesInaccessible(ret + size, kSystemPageSize);
605
606    PartitionSuperPageExtentEntry* extent = reinterpret_cast<PartitionSuperPageExtentEntry*>(partitionSuperPageToMetadataArea(ptr));
607    extent->root = root;
608    PartitionPage* page = partitionPointerToPageNoAlignmentCheck(ret);
609    PartitionBucket* bucket = reinterpret_cast<PartitionBucket*>(reinterpret_cast<char*>(page) + kPageMetadataSize);
610    page->freelistHead = 0;
611    page->nextPage = 0;
612    page->bucket = bucket;
613    page->numAllocatedSlots = 1;
614    page->numUnprovisionedSlots = 0;
615    page->pageOffset = 0;
616    page->freeCacheIndex = 0;
617
618    bucket->activePagesHead = 0;
619    bucket->freePagesHead = 0;
620    bucket->slotSize = size;
621    bucket->numSystemPagesPerSlotSpan = 0;
622    bucket->numFullPages = 0;
623
624    PartitionDirectMapExtent* mapExtent = partitionPageToDirectMapExtent(page);
625    mapExtent->mapSize = mapSize - kPartitionPageSize - kSystemPageSize;
626
627    return ret;
628}
629
630static ALWAYS_INLINE void partitionDirectUnmap(PartitionPage* page)
631{
632    size_t unmapSize = partitionPageToDirectMapExtent(page)->mapSize;
633
634    // Add on the size of the trailing guard page and preceeding partition
635    // page.
636    unmapSize += kPartitionPageSize + kSystemPageSize;
637
638    ASSERT(!(unmapSize & kPageAllocationGranularityOffsetMask));
639
640    char* ptr = reinterpret_cast<char*>(partitionPageToPointer(page));
641    // Account for the mapping starting a partition page before the actual
642    // allocation address.
643    ptr -= kPartitionPageSize;
644
645    freePages(ptr, unmapSize);
646}
647
648void* partitionAllocSlowPath(PartitionRootBase* root, int flags, size_t size, PartitionBucket* bucket)
649{
650    // The slow path is called when the freelist is empty.
651    ASSERT(!bucket->activePagesHead->freelistHead);
652
653    // For the partitionAllocGeneric API, we have a bunch of buckets marked
654    // as special cases. We bounce them through to the slow path so that we
655    // can still have a blazing fast hot path due to lack of corner-case
656    // branches.
657    bool returnNull = flags & PartitionAllocReturnNull;
658    if (UNLIKELY(partitionBucketIsDirectMapped(bucket))) {
659        ASSERT(size > kGenericMaxBucketed);
660        ASSERT(bucket == &PartitionRootBase::gPagedBucket);
661        if (size > kGenericMaxDirectMapped) {
662            if (returnNull)
663                return 0;
664            RELEASE_ASSERT(false);
665        }
666        return partitionDirectMap(root, flags, size);
667    }
668
669    // First, look for a usable page in the existing active pages list.
670    // Change active page, accepting the current page as a candidate.
671    if (LIKELY(partitionSetNewActivePage(bucket->activePagesHead))) {
672        PartitionPage* newPage = bucket->activePagesHead;
673        if (LIKELY(newPage->freelistHead != 0)) {
674            PartitionFreelistEntry* ret = newPage->freelistHead;
675            newPage->freelistHead = partitionFreelistMask(ret->next);
676            newPage->numAllocatedSlots++;
677            return ret;
678        }
679        ASSERT(newPage->numUnprovisionedSlots);
680        return partitionPageAllocAndFillFreelist(newPage);
681    }
682
683    // Second, look in our list of freed but reserved pages.
684    PartitionPage* newPage = bucket->freePagesHead;
685    if (LIKELY(newPage != 0)) {
686        ASSERT(newPage != &PartitionRootGeneric::gSeedPage);
687        ASSERT(!newPage->freelistHead);
688        ASSERT(!newPage->numAllocatedSlots);
689        ASSERT(!newPage->numUnprovisionedSlots);
690        ASSERT(newPage->freeCacheIndex == -1);
691        bucket->freePagesHead = newPage->nextPage;
692        void* addr = partitionPageToPointer(newPage);
693        partitionRecommitSystemPages(root, addr, newPage->bucket->numSystemPagesPerSlotSpan * kSystemPageSize);
694    } else {
695        // Third. If we get here, we need a brand new page.
696        size_t numPartitionPages = partitionBucketPartitionPages(bucket);
697        void* rawNewPage = partitionAllocPartitionPages(root, flags, numPartitionPages);
698        if (UNLIKELY(!rawNewPage)) {
699            ASSERT(returnNull);
700            return 0;
701        }
702        // Skip the alignment check because it depends on page->bucket, which is not yet set.
703        newPage = partitionPointerToPageNoAlignmentCheck(rawNewPage);
704    }
705
706    partitionPageReset(newPage, bucket);
707    bucket->activePagesHead = newPage;
708    return partitionPageAllocAndFillFreelist(newPage);
709}
710
711static ALWAYS_INLINE void partitionFreePage(PartitionRootBase* root, PartitionPage* page)
712{
713    ASSERT(page->freelistHead);
714    ASSERT(!page->numAllocatedSlots);
715    partitionUnusePage(root, page);
716    // We actually leave the freed page in the active list. We'll sweep it on
717    // to the free page list when we next walk the active page list. Pulling
718    // this trick enables us to use a singly-linked page list for all cases,
719    // which is critical in keeping the page metadata structure down to 32
720    // bytes in size.
721    page->freelistHead = 0;
722    page->numUnprovisionedSlots = 0;
723}
724
725static ALWAYS_INLINE void partitionRegisterEmptyPage(PartitionPage* page)
726{
727    PartitionRootBase* root = partitionPageToRoot(page);
728
729    // If the page is already registered as empty, give it another life.
730    if (page->freeCacheIndex != -1) {
731        ASSERT(page->freeCacheIndex >= 0);
732        ASSERT(static_cast<unsigned>(page->freeCacheIndex) < kMaxFreeableSpans);
733        ASSERT(root->globalEmptyPageRing[page->freeCacheIndex] == page);
734        root->globalEmptyPageRing[page->freeCacheIndex] = 0;
735    }
736
737    size_t currentIndex = root->globalEmptyPageRingIndex;
738    PartitionPage* pageToFree = root->globalEmptyPageRing[currentIndex];
739    // The page might well have been re-activated, filled up, etc. before we get
740    // around to looking at it here.
741    if (pageToFree) {
742        ASSERT(pageToFree != &PartitionRootBase::gSeedPage);
743        ASSERT(pageToFree->freeCacheIndex >= 0);
744        ASSERT(static_cast<unsigned>(pageToFree->freeCacheIndex) < kMaxFreeableSpans);
745        ASSERT(pageToFree == root->globalEmptyPageRing[pageToFree->freeCacheIndex]);
746        if (!pageToFree->numAllocatedSlots && pageToFree->freelistHead) {
747            // The page is still empty, and not freed, so _really_ free it.
748            partitionFreePage(root, pageToFree);
749        }
750        pageToFree->freeCacheIndex = -1;
751    }
752
753    // We put the empty slot span on our global list of "pages that were once
754    // empty". thus providing it a bit of breathing room to get re-used before
755    // we really free it. This improves performance, particularly on Mac OS X
756    // which has subpar memory management performance.
757    root->globalEmptyPageRing[currentIndex] = page;
758    page->freeCacheIndex = currentIndex;
759    ++currentIndex;
760    if (currentIndex == kMaxFreeableSpans)
761        currentIndex = 0;
762    root->globalEmptyPageRingIndex = currentIndex;
763}
764
765void partitionFreeSlowPath(PartitionPage* page)
766{
767    PartitionBucket* bucket = page->bucket;
768    ASSERT(page != &PartitionRootGeneric::gSeedPage);
769    ASSERT(bucket->activePagesHead != &PartitionRootGeneric::gSeedPage);
770    if (LIKELY(page->numAllocatedSlots == 0)) {
771        // Page became fully unused.
772        if (UNLIKELY(partitionBucketIsDirectMapped(bucket))) {
773            partitionDirectUnmap(page);
774            return;
775        }
776        // If it's the current active page, attempt to change it. We'd prefer to leave
777        // the page empty as a gentle force towards defragmentation.
778        if (LIKELY(page == bucket->activePagesHead) && page->nextPage) {
779            if (partitionSetNewActivePage(page->nextPage)) {
780                ASSERT(bucket->activePagesHead != page);
781                // Link the empty page back in after the new current page, to
782                // avoid losing a reference to it.
783                // TODO: consider walking the list to link the empty page after
784                // all non-empty pages?
785                PartitionPage* currentPage = bucket->activePagesHead;
786                page->nextPage = currentPage->nextPage;
787                currentPage->nextPage = page;
788            } else {
789                bucket->activePagesHead = page;
790                page->nextPage = 0;
791            }
792        }
793        partitionRegisterEmptyPage(page);
794    } else {
795        // Ensure that the page is full. That's the only valid case if we
796        // arrive here.
797        ASSERT(page->numAllocatedSlots < 0);
798        // A transition of numAllocatedSlots from 0 to -1 is not legal, and
799        // likely indicates a double-free.
800        RELEASE_ASSERT(page->numAllocatedSlots != -1);
801        page->numAllocatedSlots = -page->numAllocatedSlots - 2;
802        ASSERT(page->numAllocatedSlots == static_cast<int>(partitionBucketSlots(bucket) - 1));
803        // Fully used page became partially used. It must be put back on the
804        // non-full page list. Also make it the current page to increase the
805        // chances of it being filled up again. The old current page will be
806        // the next page.
807        page->nextPage = bucket->activePagesHead;
808        bucket->activePagesHead = page;
809        --bucket->numFullPages;
810        // Special case: for a partition page with just a single slot, it may
811        // now be empty and we want to run it through the empty logic.
812        if (UNLIKELY(page->numAllocatedSlots == 0))
813            partitionFreeSlowPath(page);
814    }
815}
816
817bool partitionReallocDirectMappedInPlace(PartitionRootGeneric* root, PartitionPage* page, size_t newSize)
818{
819    ASSERT(partitionBucketIsDirectMapped(page->bucket));
820
821    newSize = partitionCookieSizeAdjustAdd(newSize);
822
823    // Note that the new size might be a bucketed size; this function is called
824    // whenever we're reallocating a direct mapped allocation.
825    newSize = partitionDirectMapSize(newSize);
826    if (newSize < kGenericMinDirectMappedDownsize)
827        return false;
828
829    // bucket->slotSize is the current size of the allocation.
830    size_t currentSize = page->bucket->slotSize;
831    if (newSize == currentSize)
832        return true;
833
834    char* charPtr = static_cast<char*>(partitionPageToPointer(page));
835
836    if (newSize < currentSize) {
837        size_t mapSize = partitionPageToDirectMapExtent(page)->mapSize;
838
839        // Don't reallocate in-place if new size is less than 80 % of the full
840        // map size, to avoid holding on to too much unused address space.
841        if ((newSize / kSystemPageSize) * 5 < (mapSize / kSystemPageSize) * 4)
842            return false;
843
844        // Shrink by decommitting unneeded pages and making them inaccessible.
845        size_t decommitSize = currentSize - newSize;
846        partitionDecommitSystemPages(root, charPtr + newSize, decommitSize);
847        setSystemPagesInaccessible(charPtr + newSize, decommitSize);
848    } else if (newSize <= partitionPageToDirectMapExtent(page)->mapSize) {
849        // Grow within the actually allocated memory. Just need to make the
850        // pages accessible again.
851        size_t recommitSize = newSize - currentSize;
852        setSystemPagesAccessible(charPtr + currentSize, recommitSize);
853        partitionRecommitSystemPages(root, charPtr + currentSize, recommitSize);
854
855#if ENABLE(ASSERT)
856        memset(charPtr + currentSize, kUninitializedByte, recommitSize);
857#endif
858    } else {
859        // We can't perform the realloc in-place.
860        // TODO: support this too when possible.
861        return false;
862    }
863
864#if ENABLE(ASSERT)
865    // Write a new trailing cookie.
866    partitionCookieWriteValue(charPtr + newSize - kCookieSize);
867#endif
868
869    page->bucket->slotSize = newSize;
870    return true;
871}
872
873void* partitionReallocGeneric(PartitionRootGeneric* root, void* ptr, size_t newSize)
874{
875#if defined(MEMORY_TOOL_REPLACES_ALLOCATOR)
876    return realloc(ptr, newSize);
877#else
878    if (UNLIKELY(!ptr))
879        return partitionAllocGeneric(root, newSize);
880    if (UNLIKELY(!newSize)) {
881        partitionFreeGeneric(root, ptr);
882        return 0;
883    }
884
885    RELEASE_ASSERT(newSize <= kGenericMaxDirectMapped);
886
887    ASSERT(partitionPointerIsValid(partitionCookieFreePointerAdjust(ptr)));
888
889    PartitionPage* page = partitionPointerToPage(partitionCookieFreePointerAdjust(ptr));
890
891    if (UNLIKELY(partitionBucketIsDirectMapped(page->bucket))) {
892        // We may be able to perform the realloc in place by changing the
893        // accessibility of memory pages and, if reducing the size, decommitting
894        // them.
895        if (partitionReallocDirectMappedInPlace(root, page, newSize))
896            return ptr;
897    }
898
899    size_t actualNewSize = partitionAllocActualSize(root, newSize);
900    size_t actualOldSize = partitionAllocGetSize(ptr);
901
902    // TODO: note that tcmalloc will "ignore" a downsizing realloc() unless the
903    // new size is a significant percentage smaller. We could do the same if we
904    // determine it is a win.
905    if (actualNewSize == actualOldSize) {
906        // Trying to allocate a block of size newSize would give us a block of
907        // the same size as the one we've already got, so no point in doing
908        // anything here.
909        return ptr;
910    }
911
912    // This realloc cannot be resized in-place. Sadness.
913    void* ret = partitionAllocGeneric(root, newSize);
914    size_t copySize = actualOldSize;
915    if (newSize < copySize)
916        copySize = newSize;
917
918    memcpy(ret, ptr, copySize);
919    partitionFreeGeneric(root, ptr);
920    return ret;
921#endif
922}
923
924#ifndef NDEBUG
925
926void partitionDumpStats(const PartitionRoot& root)
927{
928    size_t i;
929    size_t totalLive = 0;
930    size_t totalResident = 0;
931    size_t totalFreeable = 0;
932    for (i = 0; i < root.numBuckets; ++i) {
933        const PartitionBucket& bucket = root.buckets()[i];
934        if (bucket.activePagesHead == &PartitionRootGeneric::gSeedPage && !bucket.freePagesHead && !bucket.numFullPages) {
935            // Empty bucket with no freelist or full pages. Skip reporting it.
936            continue;
937        }
938        size_t numFreePages = 0;
939        PartitionPage* freePages = bucket.freePagesHead;
940        while (freePages) {
941            ++numFreePages;
942            freePages = freePages->nextPage;
943        }
944        size_t bucketSlotSize = bucket.slotSize;
945        size_t bucketNumSlots = partitionBucketSlots(&bucket);
946        size_t bucketUsefulStorage = bucketSlotSize * bucketNumSlots;
947        size_t bucketPageSize = bucket.numSystemPagesPerSlotSpan * kSystemPageSize;
948        size_t bucketWaste = bucketPageSize - bucketUsefulStorage;
949        size_t numActiveBytes = bucket.numFullPages * bucketUsefulStorage;
950        size_t numResidentBytes = bucket.numFullPages * bucketPageSize;
951        size_t numFreeableBytes = 0;
952        size_t numActivePages = 0;
953        const PartitionPage* page = bucket.activePagesHead;
954        while (page) {
955            ASSERT(page != &PartitionRootGeneric::gSeedPage);
956            // A page may be on the active list but freed and not yet swept.
957            if (!page->freelistHead && !page->numUnprovisionedSlots && !page->numAllocatedSlots) {
958                ++numFreePages;
959            } else {
960                ++numActivePages;
961                numActiveBytes += (page->numAllocatedSlots * bucketSlotSize);
962                size_t pageBytesResident = (bucketNumSlots - page->numUnprovisionedSlots) * bucketSlotSize;
963                // Round up to system page size.
964                pageBytesResident = (pageBytesResident + kSystemPageOffsetMask) & kSystemPageBaseMask;
965                numResidentBytes += pageBytesResident;
966                if (!page->numAllocatedSlots)
967                    numFreeableBytes += pageBytesResident;
968            }
969            page = page->nextPage;
970        }
971        totalLive += numActiveBytes;
972        totalResident += numResidentBytes;
973        totalFreeable += numFreeableBytes;
974        printf("bucket size %zu (pageSize %zu waste %zu): %zu alloc/%zu commit/%zu freeable bytes, %zu/%zu/%zu full/active/free pages\n", bucketSlotSize, bucketPageSize, bucketWaste, numActiveBytes, numResidentBytes, numFreeableBytes, static_cast<size_t>(bucket.numFullPages), numActivePages, numFreePages);
975    }
976    printf("total live: %zu bytes\n", totalLive);
977    printf("total resident: %zu bytes\n", totalResident);
978    printf("total freeable: %zu bytes\n", totalFreeable);
979    fflush(stdout);
980}
981
982#endif // !NDEBUG
983
984} // namespace WTF
985
986