1#include "Python.h"
2
3#include <stdbool.h>
4
5
6/* Defined in tracemalloc.c */
7extern void _PyMem_DumpTraceback(int fd, const void *ptr);
8
9
10/* Python's malloc wrappers (see pymem.h) */
11
12#undef  uint
13#define uint    unsigned int    /* assuming >= 16 bits */
14
15/* Forward declaration */
16static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
17static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
18static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
19static void _PyMem_DebugRawFree(void *ctx, void *p);
20
21static void* _PyMem_DebugMalloc(void *ctx, size_t size);
22static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
23static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
24static void _PyMem_DebugFree(void *ctx, void *p);
25
26static void _PyObject_DebugDumpAddress(const void *p);
27static void _PyMem_DebugCheckAddress(char api_id, const void *p);
28
29#if defined(__has_feature)  /* Clang */
30 #if __has_feature(address_sanitizer)  /* is ASAN enabled? */
31  #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \
32        __attribute__((no_address_safety_analysis))
33 #else
34  #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
35 #endif
36#else
37 #if defined(__SANITIZE_ADDRESS__)  /* GCC 4.8.x, is ASAN enabled? */
38  #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \
39        __attribute__((no_address_safety_analysis))
40 #else
41  #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
42 #endif
43#endif
44
45#ifdef WITH_PYMALLOC
46
47#ifdef MS_WINDOWS
48#  include <windows.h>
49#elif defined(HAVE_MMAP)
50#  include <sys/mman.h>
51#  ifdef MAP_ANONYMOUS
52#    define ARENAS_USE_MMAP
53#  endif
54#endif
55
56/* Forward declaration */
57static void* _PyObject_Malloc(void *ctx, size_t size);
58static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
59static void _PyObject_Free(void *ctx, void *p);
60static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
61#endif
62
63
64static void *
65_PyMem_RawMalloc(void *ctx, size_t size)
66{
67    /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
68       for malloc(0), which would be treated as an error. Some platforms would
69       return a pointer with no memory behind it, which would break pymalloc.
70       To solve these problems, allocate an extra byte. */
71    if (size == 0)
72        size = 1;
73    return malloc(size);
74}
75
76static void *
77_PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
78{
79    /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
80       for calloc(0, 0), which would be treated as an error. Some platforms
81       would return a pointer with no memory behind it, which would break
82       pymalloc.  To solve these problems, allocate an extra byte. */
83    if (nelem == 0 || elsize == 0) {
84        nelem = 1;
85        elsize = 1;
86    }
87    return calloc(nelem, elsize);
88}
89
90static void *
91_PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
92{
93    if (size == 0)
94        size = 1;
95    return realloc(ptr, size);
96}
97
98static void
99_PyMem_RawFree(void *ctx, void *ptr)
100{
101    free(ptr);
102}
103
104
105#ifdef MS_WINDOWS
106static void *
107_PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
108{
109    return VirtualAlloc(NULL, size,
110                        MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
111}
112
113static void
114_PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
115{
116    VirtualFree(ptr, 0, MEM_RELEASE);
117}
118
119#elif defined(ARENAS_USE_MMAP)
120static void *
121_PyObject_ArenaMmap(void *ctx, size_t size)
122{
123    void *ptr;
124    ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
125               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
126    if (ptr == MAP_FAILED)
127        return NULL;
128    assert(ptr != NULL);
129    return ptr;
130}
131
132static void
133_PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
134{
135    munmap(ptr, size);
136}
137
138#else
139static void *
140_PyObject_ArenaMalloc(void *ctx, size_t size)
141{
142    return malloc(size);
143}
144
145static void
146_PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
147{
148    free(ptr);
149}
150#endif
151
152
153#define PYRAW_FUNCS _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree
154#ifdef WITH_PYMALLOC
155#  define PYOBJ_FUNCS _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free
156#else
157#  define PYOBJ_FUNCS PYRAW_FUNCS
158#endif
159#define PYMEM_FUNCS PYOBJ_FUNCS
160
161typedef struct {
162    /* We tag each block with an API ID in order to tag API violations */
163    char api_id;
164    PyMemAllocatorEx alloc;
165} debug_alloc_api_t;
166static struct {
167    debug_alloc_api_t raw;
168    debug_alloc_api_t mem;
169    debug_alloc_api_t obj;
170} _PyMem_Debug = {
171    {'r', {NULL, PYRAW_FUNCS}},
172    {'m', {NULL, PYMEM_FUNCS}},
173    {'o', {NULL, PYOBJ_FUNCS}}
174    };
175
176#define PYRAWDBG_FUNCS \
177    _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree
178#define PYDBG_FUNCS \
179    _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree
180
181static PyMemAllocatorEx _PyMem_Raw = {
182#ifdef Py_DEBUG
183    &_PyMem_Debug.raw, PYRAWDBG_FUNCS
184#else
185    NULL, PYRAW_FUNCS
186#endif
187    };
188
189static PyMemAllocatorEx _PyMem = {
190#ifdef Py_DEBUG
191    &_PyMem_Debug.mem, PYDBG_FUNCS
192#else
193    NULL, PYMEM_FUNCS
194#endif
195    };
196
197static PyMemAllocatorEx _PyObject = {
198#ifdef Py_DEBUG
199    &_PyMem_Debug.obj, PYDBG_FUNCS
200#else
201    NULL, PYOBJ_FUNCS
202#endif
203    };
204
205int
206_PyMem_SetupAllocators(const char *opt)
207{
208    if (opt == NULL || *opt == '\0') {
209        /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
210           options): use default allocators */
211#ifdef Py_DEBUG
212#  ifdef WITH_PYMALLOC
213        opt = "pymalloc_debug";
214#  else
215        opt = "malloc_debug";
216#  endif
217#else
218   /* !Py_DEBUG */
219#  ifdef WITH_PYMALLOC
220        opt = "pymalloc";
221#  else
222        opt = "malloc";
223#  endif
224#endif
225    }
226
227    if (strcmp(opt, "debug") == 0) {
228        PyMem_SetupDebugHooks();
229    }
230    else if (strcmp(opt, "malloc") == 0 || strcmp(opt, "malloc_debug") == 0)
231    {
232        PyMemAllocatorEx alloc = {NULL, PYRAW_FUNCS};
233
234        PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
235        PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
236        PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
237
238        if (strcmp(opt, "malloc_debug") == 0)
239            PyMem_SetupDebugHooks();
240    }
241#ifdef WITH_PYMALLOC
242    else if (strcmp(opt, "pymalloc") == 0
243             || strcmp(opt, "pymalloc_debug") == 0)
244    {
245        PyMemAllocatorEx raw_alloc = {NULL, PYRAW_FUNCS};
246        PyMemAllocatorEx mem_alloc = {NULL, PYMEM_FUNCS};
247        PyMemAllocatorEx obj_alloc = {NULL, PYOBJ_FUNCS};
248
249        PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &raw_alloc);
250        PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &mem_alloc);
251        PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &obj_alloc);
252
253        if (strcmp(opt, "pymalloc_debug") == 0)
254            PyMem_SetupDebugHooks();
255    }
256#endif
257    else {
258        /* unknown allocator */
259        return -1;
260    }
261    return 0;
262}
263
264#undef PYRAW_FUNCS
265#undef PYMEM_FUNCS
266#undef PYOBJ_FUNCS
267#undef PYRAWDBG_FUNCS
268#undef PYDBG_FUNCS
269
270static PyObjectArenaAllocator _PyObject_Arena = {NULL,
271#ifdef MS_WINDOWS
272    _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
273#elif defined(ARENAS_USE_MMAP)
274    _PyObject_ArenaMmap, _PyObject_ArenaMunmap
275#else
276    _PyObject_ArenaMalloc, _PyObject_ArenaFree
277#endif
278    };
279
280#ifdef WITH_PYMALLOC
281static int
282_PyMem_DebugEnabled(void)
283{
284    return (_PyObject.malloc == _PyMem_DebugMalloc);
285}
286
287int
288_PyMem_PymallocEnabled(void)
289{
290    if (_PyMem_DebugEnabled()) {
291        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
292    }
293    else {
294        return (_PyObject.malloc == _PyObject_Malloc);
295    }
296}
297#endif
298
299void
300PyMem_SetupDebugHooks(void)
301{
302    PyMemAllocatorEx alloc;
303
304    alloc.malloc = _PyMem_DebugRawMalloc;
305    alloc.calloc = _PyMem_DebugRawCalloc;
306    alloc.realloc = _PyMem_DebugRawRealloc;
307    alloc.free = _PyMem_DebugRawFree;
308
309    if (_PyMem_Raw.malloc != _PyMem_DebugRawMalloc) {
310        alloc.ctx = &_PyMem_Debug.raw;
311        PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
312        PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
313    }
314
315    alloc.malloc = _PyMem_DebugMalloc;
316    alloc.calloc = _PyMem_DebugCalloc;
317    alloc.realloc = _PyMem_DebugRealloc;
318    alloc.free = _PyMem_DebugFree;
319
320    if (_PyMem.malloc != _PyMem_DebugMalloc) {
321        alloc.ctx = &_PyMem_Debug.mem;
322        PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
323        PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
324    }
325
326    if (_PyObject.malloc != _PyMem_DebugMalloc) {
327        alloc.ctx = &_PyMem_Debug.obj;
328        PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
329        PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
330    }
331}
332
333void
334PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
335{
336    switch(domain)
337    {
338    case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
339    case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
340    case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
341    default:
342        /* unknown domain: set all attributes to NULL */
343        allocator->ctx = NULL;
344        allocator->malloc = NULL;
345        allocator->calloc = NULL;
346        allocator->realloc = NULL;
347        allocator->free = NULL;
348    }
349}
350
351void
352PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
353{
354    switch(domain)
355    {
356    case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
357    case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
358    case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
359    /* ignore unknown domain */
360    }
361}
362
363void
364PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
365{
366    *allocator = _PyObject_Arena;
367}
368
369void
370PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
371{
372    _PyObject_Arena = *allocator;
373}
374
375void *
376PyMem_RawMalloc(size_t size)
377{
378    /*
379     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
380     * Most python internals blindly use a signed Py_ssize_t to track
381     * things without checking for overflows or negatives.
382     * As size_t is unsigned, checking for size < 0 is not required.
383     */
384    if (size > (size_t)PY_SSIZE_T_MAX)
385        return NULL;
386    return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
387}
388
389void *
390PyMem_RawCalloc(size_t nelem, size_t elsize)
391{
392    /* see PyMem_RawMalloc() */
393    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
394        return NULL;
395    return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
396}
397
398void*
399PyMem_RawRealloc(void *ptr, size_t new_size)
400{
401    /* see PyMem_RawMalloc() */
402    if (new_size > (size_t)PY_SSIZE_T_MAX)
403        return NULL;
404    return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
405}
406
407void PyMem_RawFree(void *ptr)
408{
409    _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
410}
411
412void *
413PyMem_Malloc(size_t size)
414{
415    /* see PyMem_RawMalloc() */
416    if (size > (size_t)PY_SSIZE_T_MAX)
417        return NULL;
418    return _PyMem.malloc(_PyMem.ctx, size);
419}
420
421void *
422PyMem_Calloc(size_t nelem, size_t elsize)
423{
424    /* see PyMem_RawMalloc() */
425    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
426        return NULL;
427    return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
428}
429
430void *
431PyMem_Realloc(void *ptr, size_t new_size)
432{
433    /* see PyMem_RawMalloc() */
434    if (new_size > (size_t)PY_SSIZE_T_MAX)
435        return NULL;
436    return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
437}
438
439void
440PyMem_Free(void *ptr)
441{
442    _PyMem.free(_PyMem.ctx, ptr);
443}
444
445char *
446_PyMem_RawStrdup(const char *str)
447{
448    size_t size;
449    char *copy;
450
451    size = strlen(str) + 1;
452    copy = PyMem_RawMalloc(size);
453    if (copy == NULL)
454        return NULL;
455    memcpy(copy, str, size);
456    return copy;
457}
458
459char *
460_PyMem_Strdup(const char *str)
461{
462    size_t size;
463    char *copy;
464
465    size = strlen(str) + 1;
466    copy = PyMem_Malloc(size);
467    if (copy == NULL)
468        return NULL;
469    memcpy(copy, str, size);
470    return copy;
471}
472
473void *
474PyObject_Malloc(size_t size)
475{
476    /* see PyMem_RawMalloc() */
477    if (size > (size_t)PY_SSIZE_T_MAX)
478        return NULL;
479    return _PyObject.malloc(_PyObject.ctx, size);
480}
481
482void *
483PyObject_Calloc(size_t nelem, size_t elsize)
484{
485    /* see PyMem_RawMalloc() */
486    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
487        return NULL;
488    return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
489}
490
491void *
492PyObject_Realloc(void *ptr, size_t new_size)
493{
494    /* see PyMem_RawMalloc() */
495    if (new_size > (size_t)PY_SSIZE_T_MAX)
496        return NULL;
497    return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
498}
499
500void
501PyObject_Free(void *ptr)
502{
503    _PyObject.free(_PyObject.ctx, ptr);
504}
505
506
507#ifdef WITH_PYMALLOC
508
509#ifdef WITH_VALGRIND
510#include <valgrind/valgrind.h>
511
512/* If we're using GCC, use __builtin_expect() to reduce overhead of
513   the valgrind checks */
514#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
515#  define UNLIKELY(value) __builtin_expect((value), 0)
516#else
517#  define UNLIKELY(value) (value)
518#endif
519
520/* -1 indicates that we haven't checked that we're running on valgrind yet. */
521static int running_on_valgrind = -1;
522#endif
523
524/* An object allocator for Python.
525
526   Here is an introduction to the layers of the Python memory architecture,
527   showing where the object allocator is actually used (layer +2), It is
528   called for every object allocation and deallocation (PyObject_New/Del),
529   unless the object-specific allocators implement a proprietary allocation
530   scheme (ex.: ints use a simple free list). This is also the place where
531   the cyclic garbage collector operates selectively on container objects.
532
533
534    Object-specific allocators
535    _____   ______   ______       ________
536   [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
537+3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
538    _______________________________       |                           |
539   [   Python's object allocator   ]      |                           |
540+2 | ####### Object memory ####### | <------ Internal buffers ------> |
541    ______________________________________________________________    |
542   [          Python's raw memory allocator (PyMem_ API)          ]   |
543+1 | <----- Python memory (under PyMem manager's control) ------> |   |
544    __________________________________________________________________
545   [    Underlying general-purpose allocator (ex: C library malloc)   ]
546 0 | <------ Virtual memory allocated for the python process -------> |
547
548   =========================================================================
549    _______________________________________________________________________
550   [                OS-specific Virtual Memory Manager (VMM)               ]
551-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
552    __________________________________   __________________________________
553   [                                  ] [                                  ]
554-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
555
556*/
557/*==========================================================================*/
558
559/* A fast, special-purpose memory allocator for small blocks, to be used
560   on top of a general-purpose malloc -- heavily based on previous art. */
561
562/* Vladimir Marangozov -- August 2000 */
563
564/*
565 * "Memory management is where the rubber meets the road -- if we do the wrong
566 * thing at any level, the results will not be good. And if we don't make the
567 * levels work well together, we are in serious trouble." (1)
568 *
569 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
570 *    "Dynamic Storage Allocation: A Survey and Critical Review",
571 *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
572 */
573
574/* #undef WITH_MEMORY_LIMITS */         /* disable mem limit checks  */
575
576/*==========================================================================*/
577
578/*
579 * Allocation strategy abstract:
580 *
581 * For small requests, the allocator sub-allocates <Big> blocks of memory.
582 * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
583 * system's allocator.
584 *
585 * Small requests are grouped in size classes spaced 8 bytes apart, due
586 * to the required valid alignment of the returned address. Requests of
587 * a particular size are serviced from memory pools of 4K (one VMM page).
588 * Pools are fragmented on demand and contain free lists of blocks of one
589 * particular size class. In other words, there is a fixed-size allocator
590 * for each size class. Free pools are shared by the different allocators
591 * thus minimizing the space reserved for a particular size class.
592 *
593 * This allocation strategy is a variant of what is known as "simple
594 * segregated storage based on array of free lists". The main drawback of
595 * simple segregated storage is that we might end up with lot of reserved
596 * memory for the different free lists, which degenerate in time. To avoid
597 * this, we partition each free list in pools and we share dynamically the
598 * reserved space between all free lists. This technique is quite efficient
599 * for memory intensive programs which allocate mainly small-sized blocks.
600 *
601 * For small requests we have the following table:
602 *
603 * Request in bytes     Size of allocated block      Size class idx
604 * ----------------------------------------------------------------
605 *        1-8                     8                       0
606 *        9-16                   16                       1
607 *       17-24                   24                       2
608 *       25-32                   32                       3
609 *       33-40                   40                       4
610 *       41-48                   48                       5
611 *       49-56                   56                       6
612 *       57-64                   64                       7
613 *       65-72                   72                       8
614 *        ...                   ...                     ...
615 *      497-504                 504                      62
616 *      505-512                 512                      63
617 *
618 *      0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
619 *      allocator.
620 */
621
622/*==========================================================================*/
623
624/*
625 * -- Main tunable settings section --
626 */
627
628/*
629 * Alignment of addresses returned to the user. 8-bytes alignment works
630 * on most current architectures (with 32-bit or 64-bit address busses).
631 * The alignment value is also used for grouping small requests in size
632 * classes spaced ALIGNMENT bytes apart.
633 *
634 * You shouldn't change this unless you know what you are doing.
635 */
636#define ALIGNMENT               8               /* must be 2^N */
637#define ALIGNMENT_SHIFT         3
638
639/* Return the number of bytes in size class I, as a uint. */
640#define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
641
642/*
643 * Max size threshold below which malloc requests are considered to be
644 * small enough in order to use preallocated memory pools. You can tune
645 * this value according to your application behaviour and memory needs.
646 *
647 * Note: a size threshold of 512 guarantees that newly created dictionaries
648 * will be allocated from preallocated memory pools on 64-bit.
649 *
650 * The following invariants must hold:
651 *      1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
652 *      2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
653 *
654 * Although not required, for better performance and space efficiency,
655 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
656 */
657#define SMALL_REQUEST_THRESHOLD 512
658#define NB_SMALL_SIZE_CLASSES   (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
659
660/*
661 * The system's VMM page size can be obtained on most unices with a
662 * getpagesize() call or deduced from various header files. To make
663 * things simpler, we assume that it is 4K, which is OK for most systems.
664 * It is probably better if this is the native page size, but it doesn't
665 * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
666 * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
667 * violation fault.  4K is apparently OK for all the platforms that python
668 * currently targets.
669 */
670#define SYSTEM_PAGE_SIZE        (4 * 1024)
671#define SYSTEM_PAGE_SIZE_MASK   (SYSTEM_PAGE_SIZE - 1)
672
673/*
674 * Maximum amount of memory managed by the allocator for small requests.
675 */
676#ifdef WITH_MEMORY_LIMITS
677#ifndef SMALL_MEMORY_LIMIT
678#define SMALL_MEMORY_LIMIT      (64 * 1024 * 1024)      /* 64 MB -- more? */
679#endif
680#endif
681
682/*
683 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
684 * on a page boundary. This is a reserved virtual address space for the
685 * current process (obtained through a malloc()/mmap() call). In no way this
686 * means that the memory arenas will be used entirely. A malloc(<Big>) is
687 * usually an address range reservation for <Big> bytes, unless all pages within
688 * this space are referenced subsequently. So malloc'ing big blocks and not
689 * using them does not mean "wasting memory". It's an addressable range
690 * wastage...
691 *
692 * Arenas are allocated with mmap() on systems supporting anonymous memory
693 * mappings to reduce heap fragmentation.
694 */
695#define ARENA_SIZE              (256 << 10)     /* 256KB */
696
697#ifdef WITH_MEMORY_LIMITS
698#define MAX_ARENAS              (SMALL_MEMORY_LIMIT / ARENA_SIZE)
699#endif
700
701/*
702 * Size of the pools used for small blocks. Should be a power of 2,
703 * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
704 */
705#define POOL_SIZE               SYSTEM_PAGE_SIZE        /* must be 2^N */
706#define POOL_SIZE_MASK          SYSTEM_PAGE_SIZE_MASK
707
708/*
709 * -- End of tunable settings section --
710 */
711
712/*==========================================================================*/
713
714/*
715 * Locking
716 *
717 * To reduce lock contention, it would probably be better to refine the
718 * crude function locking with per size class locking. I'm not positive
719 * however, whether it's worth switching to such locking policy because
720 * of the performance penalty it might introduce.
721 *
722 * The following macros describe the simplest (should also be the fastest)
723 * lock object on a particular platform and the init/fini/lock/unlock
724 * operations on it. The locks defined here are not expected to be recursive
725 * because it is assumed that they will always be called in the order:
726 * INIT, [LOCK, UNLOCK]*, FINI.
727 */
728
729/*
730 * Python's threads are serialized, so object malloc locking is disabled.
731 */
732#define SIMPLELOCK_DECL(lock)   /* simple lock declaration              */
733#define SIMPLELOCK_INIT(lock)   /* allocate (if needed) and initialize  */
734#define SIMPLELOCK_FINI(lock)   /* free/destroy an existing lock        */
735#define SIMPLELOCK_LOCK(lock)   /* acquire released lock */
736#define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
737
738/* When you say memory, my mind reasons in terms of (pointers to) blocks */
739typedef uint8_t block;
740
741/* Pool for small blocks. */
742struct pool_header {
743    union { block *_padding;
744            uint count; } ref;          /* number of allocated blocks    */
745    block *freeblock;                   /* pool's free list head         */
746    struct pool_header *nextpool;       /* next pool of this size class  */
747    struct pool_header *prevpool;       /* previous pool       ""        */
748    uint arenaindex;                    /* index into arenas of base adr */
749    uint szidx;                         /* block size class index        */
750    uint nextoffset;                    /* bytes to virgin block         */
751    uint maxnextoffset;                 /* largest valid nextoffset      */
752};
753
754typedef struct pool_header *poolp;
755
756/* Record keeping for arenas. */
757struct arena_object {
758    /* The address of the arena, as returned by malloc.  Note that 0
759     * will never be returned by a successful malloc, and is used
760     * here to mark an arena_object that doesn't correspond to an
761     * allocated arena.
762     */
763    uintptr_t address;
764
765    /* Pool-aligned pointer to the next pool to be carved off. */
766    block* pool_address;
767
768    /* The number of available pools in the arena:  free pools + never-
769     * allocated pools.
770     */
771    uint nfreepools;
772
773    /* The total number of pools in the arena, whether or not available. */
774    uint ntotalpools;
775
776    /* Singly-linked list of available pools. */
777    struct pool_header* freepools;
778
779    /* Whenever this arena_object is not associated with an allocated
780     * arena, the nextarena member is used to link all unassociated
781     * arena_objects in the singly-linked `unused_arena_objects` list.
782     * The prevarena member is unused in this case.
783     *
784     * When this arena_object is associated with an allocated arena
785     * with at least one available pool, both members are used in the
786     * doubly-linked `usable_arenas` list, which is maintained in
787     * increasing order of `nfreepools` values.
788     *
789     * Else this arena_object is associated with an allocated arena
790     * all of whose pools are in use.  `nextarena` and `prevarena`
791     * are both meaningless in this case.
792     */
793    struct arena_object* nextarena;
794    struct arena_object* prevarena;
795};
796
797#define POOL_OVERHEAD   _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
798
799#define DUMMY_SIZE_IDX          0xffff  /* size class of newly cached pools */
800
801/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
802#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
803
804/* Return total number of blocks in pool of size index I, as a uint. */
805#define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
806
807/*==========================================================================*/
808
809/*
810 * This malloc lock
811 */
812SIMPLELOCK_DECL(_malloc_lock)
813#define LOCK()          SIMPLELOCK_LOCK(_malloc_lock)
814#define UNLOCK()        SIMPLELOCK_UNLOCK(_malloc_lock)
815#define LOCK_INIT()     SIMPLELOCK_INIT(_malloc_lock)
816#define LOCK_FINI()     SIMPLELOCK_FINI(_malloc_lock)
817
818/*
819 * Pool table -- headed, circular, doubly-linked lists of partially used pools.
820
821This is involved.  For an index i, usedpools[i+i] is the header for a list of
822all partially used pools holding small blocks with "size class idx" i. So
823usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
82416, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
825
826Pools are carved off an arena's highwater mark (an arena_object's pool_address
827member) as needed.  Once carved off, a pool is in one of three states forever
828after:
829
830used == partially used, neither empty nor full
831    At least one block in the pool is currently allocated, and at least one
832    block in the pool is not currently allocated (note this implies a pool
833    has room for at least two blocks).
834    This is a pool's initial state, as a pool is created only when malloc
835    needs space.
836    The pool holds blocks of a fixed size, and is in the circular list headed
837    at usedpools[i] (see above).  It's linked to the other used pools of the
838    same size class via the pool_header's nextpool and prevpool members.
839    If all but one block is currently allocated, a malloc can cause a
840    transition to the full state.  If all but one block is not currently
841    allocated, a free can cause a transition to the empty state.
842
843full == all the pool's blocks are currently allocated
844    On transition to full, a pool is unlinked from its usedpools[] list.
845    It's not linked to from anything then anymore, and its nextpool and
846    prevpool members are meaningless until it transitions back to used.
847    A free of a block in a full pool puts the pool back in the used state.
848    Then it's linked in at the front of the appropriate usedpools[] list, so
849    that the next allocation for its size class will reuse the freed block.
850
851empty == all the pool's blocks are currently available for allocation
852    On transition to empty, a pool is unlinked from its usedpools[] list,
853    and linked to the front of its arena_object's singly-linked freepools list,
854    via its nextpool member.  The prevpool member has no meaning in this case.
855    Empty pools have no inherent size class:  the next time a malloc finds
856    an empty list in usedpools[], it takes the first pool off of freepools.
857    If the size class needed happens to be the same as the size class the pool
858    last had, some pool initialization can be skipped.
859
860
861Block Management
862
863Blocks within pools are again carved out as needed.  pool->freeblock points to
864the start of a singly-linked list of free blocks within the pool.  When a
865block is freed, it's inserted at the front of its pool's freeblock list.  Note
866that the available blocks in a pool are *not* linked all together when a pool
867is initialized.  Instead only "the first two" (lowest addresses) blocks are
868set up, returning the first such block, and setting pool->freeblock to a
869one-block list holding the second such block.  This is consistent with that
870pymalloc strives at all levels (arena, pool, and block) never to touch a piece
871of memory until it's actually needed.
872
873So long as a pool is in the used state, we're certain there *is* a block
874available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
875points to the end of the free list before we've carved the entire pool into
876blocks, that means we simply haven't yet gotten to one of the higher-address
877blocks.  The offset from the pool_header to the start of "the next" virgin
878block is stored in the pool_header nextoffset member, and the largest value
879of nextoffset that makes sense is stored in the maxnextoffset member when a
880pool is initialized.  All the blocks in a pool have been passed out at least
881once when and only when nextoffset > maxnextoffset.
882
883
884Major obscurity:  While the usedpools vector is declared to have poolp
885entries, it doesn't really.  It really contains two pointers per (conceptual)
886poolp entry, the nextpool and prevpool members of a pool_header.  The
887excruciating initialization code below fools C so that
888
889    usedpool[i+i]
890
891"acts like" a genuine poolp, but only so long as you only reference its
892nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
893compensating for that a pool_header's nextpool and prevpool members
894immediately follow a pool_header's first two members:
895
896    union { block *_padding;
897            uint count; } ref;
898    block *freeblock;
899
900each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
901contains is a fudged-up pointer p such that *if* C believes it's a poolp
902pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
903circular list is empty).
904
905It's unclear why the usedpools setup is so convoluted.  It could be to
906minimize the amount of cache required to hold this heavily-referenced table
907(which only *needs* the two interpool pointer members of a pool_header). OTOH,
908referencing code has to remember to "double the index" and doing so isn't
909free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
910on that C doesn't insert any padding anywhere in a pool_header at or before
911the prevpool member.
912**************************************************************************** */
913
914#define PTA(x)  ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
915#define PT(x)   PTA(x), PTA(x)
916
917static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
918    PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
919#if NB_SMALL_SIZE_CLASSES > 8
920    , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
921#if NB_SMALL_SIZE_CLASSES > 16
922    , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
923#if NB_SMALL_SIZE_CLASSES > 24
924    , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
925#if NB_SMALL_SIZE_CLASSES > 32
926    , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
927#if NB_SMALL_SIZE_CLASSES > 40
928    , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
929#if NB_SMALL_SIZE_CLASSES > 48
930    , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
931#if NB_SMALL_SIZE_CLASSES > 56
932    , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
933#if NB_SMALL_SIZE_CLASSES > 64
934#error "NB_SMALL_SIZE_CLASSES should be less than 64"
935#endif /* NB_SMALL_SIZE_CLASSES > 64 */
936#endif /* NB_SMALL_SIZE_CLASSES > 56 */
937#endif /* NB_SMALL_SIZE_CLASSES > 48 */
938#endif /* NB_SMALL_SIZE_CLASSES > 40 */
939#endif /* NB_SMALL_SIZE_CLASSES > 32 */
940#endif /* NB_SMALL_SIZE_CLASSES > 24 */
941#endif /* NB_SMALL_SIZE_CLASSES > 16 */
942#endif /* NB_SMALL_SIZE_CLASSES >  8 */
943};
944
945/*==========================================================================
946Arena management.
947
948`arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
949which may not be currently used (== they're arena_objects that aren't
950currently associated with an allocated arena).  Note that arenas proper are
951separately malloc'ed.
952
953Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
954we do try to free() arenas, and use some mild heuristic strategies to increase
955the likelihood that arenas eventually can be freed.
956
957unused_arena_objects
958
959    This is a singly-linked list of the arena_objects that are currently not
960    being used (no arena is associated with them).  Objects are taken off the
961    head of the list in new_arena(), and are pushed on the head of the list in
962    PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
963    is on this list if and only if its .address member is 0.
964
965usable_arenas
966
967    This is a doubly-linked list of the arena_objects associated with arenas
968    that have pools available.  These pools are either waiting to be reused,
969    or have not been used before.  The list is sorted to have the most-
970    allocated arenas first (ascending order based on the nfreepools member).
971    This means that the next allocation will come from a heavily used arena,
972    which gives the nearly empty arenas a chance to be returned to the system.
973    In my unscientific tests this dramatically improved the number of arenas
974    that could be freed.
975
976Note that an arena_object associated with an arena all of whose pools are
977currently in use isn't on either list.
978*/
979
980/* Array of objects used to track chunks of memory (arenas). */
981static struct arena_object* arenas = NULL;
982/* Number of slots currently allocated in the `arenas` vector. */
983static uint maxarenas = 0;
984
985/* The head of the singly-linked, NULL-terminated list of available
986 * arena_objects.
987 */
988static struct arena_object* unused_arena_objects = NULL;
989
990/* The head of the doubly-linked, NULL-terminated at each end, list of
991 * arena_objects associated with arenas that have pools available.
992 */
993static struct arena_object* usable_arenas = NULL;
994
995/* How many arena_objects do we initially allocate?
996 * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
997 * `arenas` vector.
998 */
999#define INITIAL_ARENA_OBJECTS 16
1000
1001/* Number of arenas allocated that haven't been free()'d. */
1002static size_t narenas_currently_allocated = 0;
1003
1004/* Total number of times malloc() called to allocate an arena. */
1005static size_t ntimes_arena_allocated = 0;
1006/* High water mark (max value ever seen) for narenas_currently_allocated. */
1007static size_t narenas_highwater = 0;
1008
1009static Py_ssize_t _Py_AllocatedBlocks = 0;
1010
1011Py_ssize_t
1012_Py_GetAllocatedBlocks(void)
1013{
1014    return _Py_AllocatedBlocks;
1015}
1016
1017
1018/* Allocate a new arena.  If we run out of memory, return NULL.  Else
1019 * allocate a new arena, and return the address of an arena_object
1020 * describing the new arena.  It's expected that the caller will set
1021 * `usable_arenas` to the return value.
1022 */
1023static struct arena_object*
1024new_arena(void)
1025{
1026    struct arena_object* arenaobj;
1027    uint excess;        /* number of bytes above pool alignment */
1028    void *address;
1029    static int debug_stats = -1;
1030
1031    if (debug_stats == -1) {
1032        char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1033        debug_stats = (opt != NULL && *opt != '\0');
1034    }
1035    if (debug_stats)
1036        _PyObject_DebugMallocStats(stderr);
1037
1038    if (unused_arena_objects == NULL) {
1039        uint i;
1040        uint numarenas;
1041        size_t nbytes;
1042
1043        /* Double the number of arena objects on each allocation.
1044         * Note that it's possible for `numarenas` to overflow.
1045         */
1046        numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1047        if (numarenas <= maxarenas)
1048            return NULL;                /* overflow */
1049#if SIZEOF_SIZE_T <= SIZEOF_INT
1050        if (numarenas > SIZE_MAX / sizeof(*arenas))
1051            return NULL;                /* overflow */
1052#endif
1053        nbytes = numarenas * sizeof(*arenas);
1054        arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
1055        if (arenaobj == NULL)
1056            return NULL;
1057        arenas = arenaobj;
1058
1059        /* We might need to fix pointers that were copied.  However,
1060         * new_arena only gets called when all the pages in the
1061         * previous arenas are full.  Thus, there are *no* pointers
1062         * into the old array. Thus, we don't have to worry about
1063         * invalid pointers.  Just to be sure, some asserts:
1064         */
1065        assert(usable_arenas == NULL);
1066        assert(unused_arena_objects == NULL);
1067
1068        /* Put the new arenas on the unused_arena_objects list. */
1069        for (i = maxarenas; i < numarenas; ++i) {
1070            arenas[i].address = 0;              /* mark as unassociated */
1071            arenas[i].nextarena = i < numarenas - 1 ?
1072                                   &arenas[i+1] : NULL;
1073        }
1074
1075        /* Update globals. */
1076        unused_arena_objects = &arenas[maxarenas];
1077        maxarenas = numarenas;
1078    }
1079
1080    /* Take the next available arena object off the head of the list. */
1081    assert(unused_arena_objects != NULL);
1082    arenaobj = unused_arena_objects;
1083    unused_arena_objects = arenaobj->nextarena;
1084    assert(arenaobj->address == 0);
1085    address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1086    if (address == NULL) {
1087        /* The allocation failed: return NULL after putting the
1088         * arenaobj back.
1089         */
1090        arenaobj->nextarena = unused_arena_objects;
1091        unused_arena_objects = arenaobj;
1092        return NULL;
1093    }
1094    arenaobj->address = (uintptr_t)address;
1095
1096    ++narenas_currently_allocated;
1097    ++ntimes_arena_allocated;
1098    if (narenas_currently_allocated > narenas_highwater)
1099        narenas_highwater = narenas_currently_allocated;
1100    arenaobj->freepools = NULL;
1101    /* pool_address <- first pool-aligned address in the arena
1102       nfreepools <- number of whole pools that fit after alignment */
1103    arenaobj->pool_address = (block*)arenaobj->address;
1104    arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE;
1105    assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE);
1106    excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1107    if (excess != 0) {
1108        --arenaobj->nfreepools;
1109        arenaobj->pool_address += POOL_SIZE - excess;
1110    }
1111    arenaobj->ntotalpools = arenaobj->nfreepools;
1112
1113    return arenaobj;
1114}
1115
1116/*
1117address_in_range(P, POOL)
1118
1119Return true if and only if P is an address that was allocated by pymalloc.
1120POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1121(the caller is asked to compute this because the macro expands POOL more than
1122once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1123variable and pass the latter to the macro; because address_in_range is
1124called on every alloc/realloc/free, micro-efficiency is important here).
1125
1126Tricky:  Let B be the arena base address associated with the pool, B =
1127arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
1128
1129    B <= P < B + ARENA_SIZE
1130
1131Subtracting B throughout, this is true iff
1132
1133    0 <= P-B < ARENA_SIZE
1134
1135By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1136
1137Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
1138before the first arena has been allocated.  `arenas` is still NULL in that
1139case.  We're relying on that maxarenas is also 0 in that case, so that
1140(POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
1141into a NULL arenas.
1142
1143Details:  given P and POOL, the arena_object corresponding to P is AO =
1144arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
1145stores, etc), POOL is the correct address of P's pool, AO.address is the
1146correct base address of the pool's arena, and P must be within ARENA_SIZE of
1147AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
1148(NULL)).  Therefore address_in_range correctly reports that obmalloc
1149controls P.
1150
1151Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1152call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
1153in this case -- it may even be uninitialized trash.  If the trash arenaindex
1154is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1155control P.
1156
1157Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
1158allocated arena, obmalloc controls all the memory in slice AO.address :
1159AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
1160so P doesn't lie in that slice, so the macro correctly reports that P is not
1161controlled by obmalloc.
1162
1163Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1164arena_object (one not currently associated with an allocated arena),
1165AO.address is 0, and the second test in the macro reduces to:
1166
1167    P < ARENA_SIZE
1168
1169If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1170that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
1171of the test still passes, and the third clause (AO.address != 0) is necessary
1172to get the correct result:  AO.address is 0 in this case, so the macro
1173correctly reports that P is not controlled by obmalloc (despite that P lies in
1174slice AO.address : AO.address + ARENA_SIZE).
1175
1176Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
11772.5, arenas were never free()'ed, and an arenaindex < maxarena always
1178corresponded to a currently-allocated arena, so the "P is not controlled by
1179obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1180was impossible.
1181
1182Note that the logic is excruciating, and reading up possibly uninitialized
1183memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1184creates problems for some memory debuggers.  The overwhelming advantage is
1185that this test determines whether an arbitrary address is controlled by
1186obmalloc in a small constant time, independent of the number of arenas
1187obmalloc controls.  Since this test is needed at every entry point, it's
1188extremely desirable that it be this fast.
1189*/
1190
1191static bool ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
1192address_in_range(void *p, poolp pool)
1193{
1194    // Since address_in_range may be reading from memory which was not allocated
1195    // by Python, it is important that pool->arenaindex is read only once, as
1196    // another thread may be concurrently modifying the value without holding
1197    // the GIL. The following dance forces the compiler to read pool->arenaindex
1198    // only once.
1199    uint arenaindex = *((volatile uint *)&pool->arenaindex);
1200    return arenaindex < maxarenas &&
1201        (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
1202        arenas[arenaindex].address != 0;
1203}
1204
1205/*==========================================================================*/
1206
1207/* malloc.  Note that nbytes==0 tries to return a non-NULL pointer, distinct
1208 * from all other currently live pointers.  This may not be possible.
1209 */
1210
1211/*
1212 * The basic blocks are ordered by decreasing execution frequency,
1213 * which minimizes the number of jumps in the most common cases,
1214 * improves branching prediction and instruction scheduling (small
1215 * block allocations typically result in a couple of instructions).
1216 * Unless the optimizer reorders everything, being too smart...
1217 */
1218
1219static void *
1220_PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
1221{
1222    size_t nbytes;
1223    block *bp;
1224    poolp pool;
1225    poolp next;
1226    uint size;
1227
1228    _Py_AllocatedBlocks++;
1229
1230    assert(nelem <= PY_SSIZE_T_MAX / elsize);
1231    nbytes = nelem * elsize;
1232
1233#ifdef WITH_VALGRIND
1234    if (UNLIKELY(running_on_valgrind == -1))
1235        running_on_valgrind = RUNNING_ON_VALGRIND;
1236    if (UNLIKELY(running_on_valgrind))
1237        goto redirect;
1238#endif
1239
1240    if (nelem == 0 || elsize == 0)
1241        goto redirect;
1242
1243    if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) {
1244        LOCK();
1245        /*
1246         * Most frequent paths first
1247         */
1248        size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1249        pool = usedpools[size + size];
1250        if (pool != pool->nextpool) {
1251            /*
1252             * There is a used pool for this size class.
1253             * Pick up the head block of its free list.
1254             */
1255            ++pool->ref.count;
1256            bp = pool->freeblock;
1257            assert(bp != NULL);
1258            if ((pool->freeblock = *(block **)bp) != NULL) {
1259                UNLOCK();
1260                if (use_calloc)
1261                    memset(bp, 0, nbytes);
1262                return (void *)bp;
1263            }
1264            /*
1265             * Reached the end of the free list, try to extend it.
1266             */
1267            if (pool->nextoffset <= pool->maxnextoffset) {
1268                /* There is room for another block. */
1269                pool->freeblock = (block*)pool +
1270                                  pool->nextoffset;
1271                pool->nextoffset += INDEX2SIZE(size);
1272                *(block **)(pool->freeblock) = NULL;
1273                UNLOCK();
1274                if (use_calloc)
1275                    memset(bp, 0, nbytes);
1276                return (void *)bp;
1277            }
1278            /* Pool is full, unlink from used pools. */
1279            next = pool->nextpool;
1280            pool = pool->prevpool;
1281            next->prevpool = pool;
1282            pool->nextpool = next;
1283            UNLOCK();
1284            if (use_calloc)
1285                memset(bp, 0, nbytes);
1286            return (void *)bp;
1287        }
1288
1289        /* There isn't a pool of the right size class immediately
1290         * available:  use a free pool.
1291         */
1292        if (usable_arenas == NULL) {
1293            /* No arena has a free pool:  allocate a new arena. */
1294#ifdef WITH_MEMORY_LIMITS
1295            if (narenas_currently_allocated >= MAX_ARENAS) {
1296                UNLOCK();
1297                goto redirect;
1298            }
1299#endif
1300            usable_arenas = new_arena();
1301            if (usable_arenas == NULL) {
1302                UNLOCK();
1303                goto redirect;
1304            }
1305            usable_arenas->nextarena =
1306                usable_arenas->prevarena = NULL;
1307        }
1308        assert(usable_arenas->address != 0);
1309
1310        /* Try to get a cached free pool. */
1311        pool = usable_arenas->freepools;
1312        if (pool != NULL) {
1313            /* Unlink from cached pools. */
1314            usable_arenas->freepools = pool->nextpool;
1315
1316            /* This arena already had the smallest nfreepools
1317             * value, so decreasing nfreepools doesn't change
1318             * that, and we don't need to rearrange the
1319             * usable_arenas list.  However, if the arena has
1320             * become wholly allocated, we need to remove its
1321             * arena_object from usable_arenas.
1322             */
1323            --usable_arenas->nfreepools;
1324            if (usable_arenas->nfreepools == 0) {
1325                /* Wholly allocated:  remove. */
1326                assert(usable_arenas->freepools == NULL);
1327                assert(usable_arenas->nextarena == NULL ||
1328                       usable_arenas->nextarena->prevarena ==
1329                       usable_arenas);
1330
1331                usable_arenas = usable_arenas->nextarena;
1332                if (usable_arenas != NULL) {
1333                    usable_arenas->prevarena = NULL;
1334                    assert(usable_arenas->address != 0);
1335                }
1336            }
1337            else {
1338                /* nfreepools > 0:  it must be that freepools
1339                 * isn't NULL, or that we haven't yet carved
1340                 * off all the arena's pools for the first
1341                 * time.
1342                 */
1343                assert(usable_arenas->freepools != NULL ||
1344                       usable_arenas->pool_address <=
1345                       (block*)usable_arenas->address +
1346                           ARENA_SIZE - POOL_SIZE);
1347            }
1348        init_pool:
1349            /* Frontlink to used pools. */
1350            next = usedpools[size + size]; /* == prev */
1351            pool->nextpool = next;
1352            pool->prevpool = next;
1353            next->nextpool = pool;
1354            next->prevpool = pool;
1355            pool->ref.count = 1;
1356            if (pool->szidx == size) {
1357                /* Luckily, this pool last contained blocks
1358                 * of the same size class, so its header
1359                 * and free list are already initialized.
1360                 */
1361                bp = pool->freeblock;
1362                assert(bp != NULL);
1363                pool->freeblock = *(block **)bp;
1364                UNLOCK();
1365                if (use_calloc)
1366                    memset(bp, 0, nbytes);
1367                return (void *)bp;
1368            }
1369            /*
1370             * Initialize the pool header, set up the free list to
1371             * contain just the second block, and return the first
1372             * block.
1373             */
1374            pool->szidx = size;
1375            size = INDEX2SIZE(size);
1376            bp = (block *)pool + POOL_OVERHEAD;
1377            pool->nextoffset = POOL_OVERHEAD + (size << 1);
1378            pool->maxnextoffset = POOL_SIZE - size;
1379            pool->freeblock = bp + size;
1380            *(block **)(pool->freeblock) = NULL;
1381            UNLOCK();
1382            if (use_calloc)
1383                memset(bp, 0, nbytes);
1384            return (void *)bp;
1385        }
1386
1387        /* Carve off a new pool. */
1388        assert(usable_arenas->nfreepools > 0);
1389        assert(usable_arenas->freepools == NULL);
1390        pool = (poolp)usable_arenas->pool_address;
1391        assert((block*)pool <= (block*)usable_arenas->address +
1392                               ARENA_SIZE - POOL_SIZE);
1393        pool->arenaindex = (uint)(usable_arenas - arenas);
1394        assert(&arenas[pool->arenaindex] == usable_arenas);
1395        pool->szidx = DUMMY_SIZE_IDX;
1396        usable_arenas->pool_address += POOL_SIZE;
1397        --usable_arenas->nfreepools;
1398
1399        if (usable_arenas->nfreepools == 0) {
1400            assert(usable_arenas->nextarena == NULL ||
1401                   usable_arenas->nextarena->prevarena ==
1402                   usable_arenas);
1403            /* Unlink the arena:  it is completely allocated. */
1404            usable_arenas = usable_arenas->nextarena;
1405            if (usable_arenas != NULL) {
1406                usable_arenas->prevarena = NULL;
1407                assert(usable_arenas->address != 0);
1408            }
1409        }
1410
1411        goto init_pool;
1412    }
1413
1414    /* The small block allocator ends here. */
1415
1416redirect:
1417    /* Redirect the original request to the underlying (libc) allocator.
1418     * We jump here on bigger requests, on error in the code above (as a
1419     * last chance to serve the request) or when the max memory limit
1420     * has been reached.
1421     */
1422    {
1423        void *result;
1424        if (use_calloc)
1425            result = PyMem_RawCalloc(nelem, elsize);
1426        else
1427            result = PyMem_RawMalloc(nbytes);
1428        if (!result)
1429            _Py_AllocatedBlocks--;
1430        return result;
1431    }
1432}
1433
1434static void *
1435_PyObject_Malloc(void *ctx, size_t nbytes)
1436{
1437    return _PyObject_Alloc(0, ctx, 1, nbytes);
1438}
1439
1440static void *
1441_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
1442{
1443    return _PyObject_Alloc(1, ctx, nelem, elsize);
1444}
1445
1446/* free */
1447
1448static void
1449_PyObject_Free(void *ctx, void *p)
1450{
1451    poolp pool;
1452    block *lastfree;
1453    poolp next, prev;
1454    uint size;
1455
1456    if (p == NULL)      /* free(NULL) has no effect */
1457        return;
1458
1459    _Py_AllocatedBlocks--;
1460
1461#ifdef WITH_VALGRIND
1462    if (UNLIKELY(running_on_valgrind > 0))
1463        goto redirect;
1464#endif
1465
1466    pool = POOL_ADDR(p);
1467    if (address_in_range(p, pool)) {
1468        /* We allocated this address. */
1469        LOCK();
1470        /* Link p to the start of the pool's freeblock list.  Since
1471         * the pool had at least the p block outstanding, the pool
1472         * wasn't empty (so it's already in a usedpools[] list, or
1473         * was full and is in no list -- it's not in the freeblocks
1474         * list in any case).
1475         */
1476        assert(pool->ref.count > 0);            /* else it was empty */
1477        *(block **)p = lastfree = pool->freeblock;
1478        pool->freeblock = (block *)p;
1479        if (lastfree) {
1480            struct arena_object* ao;
1481            uint nf;  /* ao->nfreepools */
1482
1483            /* freeblock wasn't NULL, so the pool wasn't full,
1484             * and the pool is in a usedpools[] list.
1485             */
1486            if (--pool->ref.count != 0) {
1487                /* pool isn't empty:  leave it in usedpools */
1488                UNLOCK();
1489                return;
1490            }
1491            /* Pool is now empty:  unlink from usedpools, and
1492             * link to the front of freepools.  This ensures that
1493             * previously freed pools will be allocated later
1494             * (being not referenced, they are perhaps paged out).
1495             */
1496            next = pool->nextpool;
1497            prev = pool->prevpool;
1498            next->prevpool = prev;
1499            prev->nextpool = next;
1500
1501            /* Link the pool to freepools.  This is a singly-linked
1502             * list, and pool->prevpool isn't used there.
1503             */
1504            ao = &arenas[pool->arenaindex];
1505            pool->nextpool = ao->freepools;
1506            ao->freepools = pool;
1507            nf = ++ao->nfreepools;
1508
1509            /* All the rest is arena management.  We just freed
1510             * a pool, and there are 4 cases for arena mgmt:
1511             * 1. If all the pools are free, return the arena to
1512             *    the system free().
1513             * 2. If this is the only free pool in the arena,
1514             *    add the arena back to the `usable_arenas` list.
1515             * 3. If the "next" arena has a smaller count of free
1516             *    pools, we have to "slide this arena right" to
1517             *    restore that usable_arenas is sorted in order of
1518             *    nfreepools.
1519             * 4. Else there's nothing more to do.
1520             */
1521            if (nf == ao->ntotalpools) {
1522                /* Case 1.  First unlink ao from usable_arenas.
1523                 */
1524                assert(ao->prevarena == NULL ||
1525                       ao->prevarena->address != 0);
1526                assert(ao ->nextarena == NULL ||
1527                       ao->nextarena->address != 0);
1528
1529                /* Fix the pointer in the prevarena, or the
1530                 * usable_arenas pointer.
1531                 */
1532                if (ao->prevarena == NULL) {
1533                    usable_arenas = ao->nextarena;
1534                    assert(usable_arenas == NULL ||
1535                           usable_arenas->address != 0);
1536                }
1537                else {
1538                    assert(ao->prevarena->nextarena == ao);
1539                    ao->prevarena->nextarena =
1540                        ao->nextarena;
1541                }
1542                /* Fix the pointer in the nextarena. */
1543                if (ao->nextarena != NULL) {
1544                    assert(ao->nextarena->prevarena == ao);
1545                    ao->nextarena->prevarena =
1546                        ao->prevarena;
1547                }
1548                /* Record that this arena_object slot is
1549                 * available to be reused.
1550                 */
1551                ao->nextarena = unused_arena_objects;
1552                unused_arena_objects = ao;
1553
1554                /* Free the entire arena. */
1555                _PyObject_Arena.free(_PyObject_Arena.ctx,
1556                                     (void *)ao->address, ARENA_SIZE);
1557                ao->address = 0;                        /* mark unassociated */
1558                --narenas_currently_allocated;
1559
1560                UNLOCK();
1561                return;
1562            }
1563            if (nf == 1) {
1564                /* Case 2.  Put ao at the head of
1565                 * usable_arenas.  Note that because
1566                 * ao->nfreepools was 0 before, ao isn't
1567                 * currently on the usable_arenas list.
1568                 */
1569                ao->nextarena = usable_arenas;
1570                ao->prevarena = NULL;
1571                if (usable_arenas)
1572                    usable_arenas->prevarena = ao;
1573                usable_arenas = ao;
1574                assert(usable_arenas->address != 0);
1575
1576                UNLOCK();
1577                return;
1578            }
1579            /* If this arena is now out of order, we need to keep
1580             * the list sorted.  The list is kept sorted so that
1581             * the "most full" arenas are used first, which allows
1582             * the nearly empty arenas to be completely freed.  In
1583             * a few un-scientific tests, it seems like this
1584             * approach allowed a lot more memory to be freed.
1585             */
1586            if (ao->nextarena == NULL ||
1587                         nf <= ao->nextarena->nfreepools) {
1588                /* Case 4.  Nothing to do. */
1589                UNLOCK();
1590                return;
1591            }
1592            /* Case 3:  We have to move the arena towards the end
1593             * of the list, because it has more free pools than
1594             * the arena to its right.
1595             * First unlink ao from usable_arenas.
1596             */
1597            if (ao->prevarena != NULL) {
1598                /* ao isn't at the head of the list */
1599                assert(ao->prevarena->nextarena == ao);
1600                ao->prevarena->nextarena = ao->nextarena;
1601            }
1602            else {
1603                /* ao is at the head of the list */
1604                assert(usable_arenas == ao);
1605                usable_arenas = ao->nextarena;
1606            }
1607            ao->nextarena->prevarena = ao->prevarena;
1608
1609            /* Locate the new insertion point by iterating over
1610             * the list, using our nextarena pointer.
1611             */
1612            while (ao->nextarena != NULL &&
1613                            nf > ao->nextarena->nfreepools) {
1614                ao->prevarena = ao->nextarena;
1615                ao->nextarena = ao->nextarena->nextarena;
1616            }
1617
1618            /* Insert ao at this point. */
1619            assert(ao->nextarena == NULL ||
1620                ao->prevarena == ao->nextarena->prevarena);
1621            assert(ao->prevarena->nextarena == ao->nextarena);
1622
1623            ao->prevarena->nextarena = ao;
1624            if (ao->nextarena != NULL)
1625                ao->nextarena->prevarena = ao;
1626
1627            /* Verify that the swaps worked. */
1628            assert(ao->nextarena == NULL ||
1629                      nf <= ao->nextarena->nfreepools);
1630            assert(ao->prevarena == NULL ||
1631                      nf > ao->prevarena->nfreepools);
1632            assert(ao->nextarena == NULL ||
1633                ao->nextarena->prevarena == ao);
1634            assert((usable_arenas == ao &&
1635                ao->prevarena == NULL) ||
1636                ao->prevarena->nextarena == ao);
1637
1638            UNLOCK();
1639            return;
1640        }
1641        /* Pool was full, so doesn't currently live in any list:
1642         * link it to the front of the appropriate usedpools[] list.
1643         * This mimics LRU pool usage for new allocations and
1644         * targets optimal filling when several pools contain
1645         * blocks of the same size class.
1646         */
1647        --pool->ref.count;
1648        assert(pool->ref.count > 0);            /* else the pool is empty */
1649        size = pool->szidx;
1650        next = usedpools[size + size];
1651        prev = next->prevpool;
1652        /* insert pool before next:   prev <-> pool <-> next */
1653        pool->nextpool = next;
1654        pool->prevpool = prev;
1655        next->prevpool = pool;
1656        prev->nextpool = pool;
1657        UNLOCK();
1658        return;
1659    }
1660
1661#ifdef WITH_VALGRIND
1662redirect:
1663#endif
1664    /* We didn't allocate this address. */
1665    PyMem_RawFree(p);
1666}
1667
1668/* realloc.  If p is NULL, this acts like malloc(nbytes).  Else if nbytes==0,
1669 * then as the Python docs promise, we do not treat this like free(p), and
1670 * return a non-NULL result.
1671 */
1672
1673static void *
1674_PyObject_Realloc(void *ctx, void *p, size_t nbytes)
1675{
1676    void *bp;
1677    poolp pool;
1678    size_t size;
1679
1680    if (p == NULL)
1681        return _PyObject_Alloc(0, ctx, 1, nbytes);
1682
1683#ifdef WITH_VALGRIND
1684    /* Treat running_on_valgrind == -1 the same as 0 */
1685    if (UNLIKELY(running_on_valgrind > 0))
1686        goto redirect;
1687#endif
1688
1689    pool = POOL_ADDR(p);
1690    if (address_in_range(p, pool)) {
1691        /* We're in charge of this block */
1692        size = INDEX2SIZE(pool->szidx);
1693        if (nbytes <= size) {
1694            /* The block is staying the same or shrinking.  If
1695             * it's shrinking, there's a tradeoff:  it costs
1696             * cycles to copy the block to a smaller size class,
1697             * but it wastes memory not to copy it.  The
1698             * compromise here is to copy on shrink only if at
1699             * least 25% of size can be shaved off.
1700             */
1701            if (4 * nbytes > 3 * size) {
1702                /* It's the same,
1703                 * or shrinking and new/old > 3/4.
1704                 */
1705                return p;
1706            }
1707            size = nbytes;
1708        }
1709        bp = _PyObject_Alloc(0, ctx, 1, nbytes);
1710        if (bp != NULL) {
1711            memcpy(bp, p, size);
1712            _PyObject_Free(ctx, p);
1713        }
1714        return bp;
1715    }
1716#ifdef WITH_VALGRIND
1717 redirect:
1718#endif
1719    /* We're not managing this block.  If nbytes <=
1720     * SMALL_REQUEST_THRESHOLD, it's tempting to try to take over this
1721     * block.  However, if we do, we need to copy the valid data from
1722     * the C-managed block to one of our blocks, and there's no portable
1723     * way to know how much of the memory space starting at p is valid.
1724     * As bug 1185883 pointed out the hard way, it's possible that the
1725     * C-managed block is "at the end" of allocated VM space, so that
1726     * a memory fault can occur if we try to copy nbytes bytes starting
1727     * at p.  Instead we punt:  let C continue to manage this block.
1728     */
1729    if (nbytes)
1730        return PyMem_RawRealloc(p, nbytes);
1731    /* C doesn't define the result of realloc(p, 0) (it may or may not
1732     * return NULL then), but Python's docs promise that nbytes==0 never
1733     * returns NULL.  We don't pass 0 to realloc(), to avoid that endcase
1734     * to begin with.  Even then, we can't be sure that realloc() won't
1735     * return NULL.
1736     */
1737    bp = PyMem_RawRealloc(p, 1);
1738    return bp ? bp : p;
1739}
1740
1741#else   /* ! WITH_PYMALLOC */
1742
1743/*==========================================================================*/
1744/* pymalloc not enabled:  Redirect the entry points to malloc.  These will
1745 * only be used by extensions that are compiled with pymalloc enabled. */
1746
1747Py_ssize_t
1748_Py_GetAllocatedBlocks(void)
1749{
1750    return 0;
1751}
1752
1753#endif /* WITH_PYMALLOC */
1754
1755
1756/*==========================================================================*/
1757/* A x-platform debugging allocator.  This doesn't manage memory directly,
1758 * it wraps a real allocator, adding extra debugging info to the memory blocks.
1759 */
1760
1761/* Special bytes broadcast into debug memory blocks at appropriate times.
1762 * Strings of these are unlikely to be valid addresses, floats, ints or
1763 * 7-bit ASCII.
1764 */
1765#undef CLEANBYTE
1766#undef DEADBYTE
1767#undef FORBIDDENBYTE
1768#define CLEANBYTE      0xCB    /* clean (newly allocated) memory */
1769#define DEADBYTE       0xDB    /* dead (newly freed) memory */
1770#define FORBIDDENBYTE  0xFB    /* untouchable bytes at each end of a block */
1771
1772static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
1773
1774/* serialno is always incremented via calling this routine.  The point is
1775 * to supply a single place to set a breakpoint.
1776 */
1777static void
1778bumpserialno(void)
1779{
1780    ++serialno;
1781}
1782
1783#define SST SIZEOF_SIZE_T
1784
1785/* Read sizeof(size_t) bytes at p as a big-endian size_t. */
1786static size_t
1787read_size_t(const void *p)
1788{
1789    const uint8_t *q = (const uint8_t *)p;
1790    size_t result = *q++;
1791    int i;
1792
1793    for (i = SST; --i > 0; ++q)
1794        result = (result << 8) | *q;
1795    return result;
1796}
1797
1798/* Write n as a big-endian size_t, MSB at address p, LSB at
1799 * p + sizeof(size_t) - 1.
1800 */
1801static void
1802write_size_t(void *p, size_t n)
1803{
1804    uint8_t *q = (uint8_t *)p + SST - 1;
1805    int i;
1806
1807    for (i = SST; --i >= 0; --q) {
1808        *q = (uint8_t)(n & 0xff);
1809        n >>= 8;
1810    }
1811}
1812
1813/* Let S = sizeof(size_t).  The debug malloc asks for 4*S extra bytes and
1814   fills them with useful stuff, here calling the underlying malloc's result p:
1815
1816p[0: S]
1817    Number of bytes originally asked for.  This is a size_t, big-endian (easier
1818    to read in a memory dump).
1819p[S]
1820    API ID.  See PEP 445.  This is a character, but seems undocumented.
1821p[S+1: 2*S]
1822    Copies of FORBIDDENBYTE.  Used to catch under- writes and reads.
1823p[2*S: 2*S+n]
1824    The requested memory, filled with copies of CLEANBYTE.
1825    Used to catch reference to uninitialized memory.
1826    &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
1827    handled the request itself.
1828p[2*S+n: 2*S+n+S]
1829    Copies of FORBIDDENBYTE.  Used to catch over- writes and reads.
1830p[2*S+n+S: 2*S+n+2*S]
1831    A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
1832    and _PyMem_DebugRealloc.
1833    This is a big-endian size_t.
1834    If "bad memory" is detected later, the serial number gives an
1835    excellent way to set a breakpoint on the next run, to capture the
1836    instant at which this block was passed out.
1837*/
1838
1839static void *
1840_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
1841{
1842    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
1843    uint8_t *p;           /* base address of malloc'ed block */
1844    uint8_t *tail;        /* p + 2*SST + nbytes == pointer to tail pad bytes */
1845    size_t total;       /* nbytes + 4*SST */
1846
1847    bumpserialno();
1848    total = nbytes + 4*SST;
1849    if (nbytes > PY_SSIZE_T_MAX - 4*SST)
1850        /* overflow:  can't represent total as a Py_ssize_t */
1851        return NULL;
1852
1853    if (use_calloc)
1854        p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
1855    else
1856        p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
1857    if (p == NULL)
1858        return NULL;
1859
1860    /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
1861    write_size_t(p, nbytes);
1862    p[SST] = (uint8_t)api->api_id;
1863    memset(p + SST + 1, FORBIDDENBYTE, SST-1);
1864
1865    if (nbytes > 0 && !use_calloc)
1866        memset(p + 2*SST, CLEANBYTE, nbytes);
1867
1868    /* at tail, write pad (SST bytes) and serialno (SST bytes) */
1869    tail = p + 2*SST + nbytes;
1870    memset(tail, FORBIDDENBYTE, SST);
1871    write_size_t(tail + SST, serialno);
1872
1873    return p + 2*SST;
1874}
1875
1876static void *
1877_PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
1878{
1879    return _PyMem_DebugRawAlloc(0, ctx, nbytes);
1880}
1881
1882static void *
1883_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
1884{
1885    size_t nbytes;
1886    assert(elsize == 0 || nelem <= PY_SSIZE_T_MAX / elsize);
1887    nbytes = nelem * elsize;
1888    return _PyMem_DebugRawAlloc(1, ctx, nbytes);
1889}
1890
1891/* The debug free first checks the 2*SST bytes on each end for sanity (in
1892   particular, that the FORBIDDENBYTEs with the api ID are still intact).
1893   Then fills the original bytes with DEADBYTE.
1894   Then calls the underlying free.
1895*/
1896static void
1897_PyMem_DebugRawFree(void *ctx, void *p)
1898{
1899    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
1900    uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
1901    size_t nbytes;
1902
1903    if (p == NULL)
1904        return;
1905    _PyMem_DebugCheckAddress(api->api_id, p);
1906    nbytes = read_size_t(q);
1907    nbytes += 4*SST;
1908    if (nbytes > 0)
1909        memset(q, DEADBYTE, nbytes);
1910    api->alloc.free(api->alloc.ctx, q);
1911}
1912
1913static void *
1914_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
1915{
1916    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
1917    uint8_t *q = (uint8_t *)p, *oldq;
1918    uint8_t *tail;
1919    size_t total;       /* nbytes + 4*SST */
1920    size_t original_nbytes;
1921    int i;
1922
1923    if (p == NULL)
1924        return _PyMem_DebugRawAlloc(0, ctx, nbytes);
1925
1926    _PyMem_DebugCheckAddress(api->api_id, p);
1927    bumpserialno();
1928    original_nbytes = read_size_t(q - 2*SST);
1929    total = nbytes + 4*SST;
1930    if (nbytes > PY_SSIZE_T_MAX - 4*SST)
1931        /* overflow:  can't represent total as a Py_ssize_t */
1932        return NULL;
1933
1934    /* Resize and add decorations. We may get a new pointer here, in which
1935     * case we didn't get the chance to mark the old memory with DEADBYTE,
1936     * but we live with that.
1937     */
1938    oldq = q;
1939    q = (uint8_t *)api->alloc.realloc(api->alloc.ctx, q - 2*SST, total);
1940    if (q == NULL)
1941        return NULL;
1942
1943    if (q == oldq && nbytes < original_nbytes) {
1944        /* shrinking:  mark old extra memory dead */
1945        memset(q + nbytes, DEADBYTE, original_nbytes - nbytes);
1946    }
1947
1948    write_size_t(q, nbytes);
1949    assert(q[SST] == (uint8_t)api->api_id);
1950    for (i = 1; i < SST; ++i)
1951        assert(q[SST + i] == FORBIDDENBYTE);
1952    q += 2*SST;
1953
1954    tail = q + nbytes;
1955    memset(tail, FORBIDDENBYTE, SST);
1956    write_size_t(tail + SST, serialno);
1957
1958    if (nbytes > original_nbytes) {
1959        /* growing:  mark new extra memory clean */
1960        memset(q + original_nbytes, CLEANBYTE,
1961               nbytes - original_nbytes);
1962    }
1963
1964    return q;
1965}
1966
1967static void
1968_PyMem_DebugCheckGIL(void)
1969{
1970#ifdef WITH_THREAD
1971    if (!PyGILState_Check())
1972        Py_FatalError("Python memory allocator called "
1973                      "without holding the GIL");
1974#endif
1975}
1976
1977static void *
1978_PyMem_DebugMalloc(void *ctx, size_t nbytes)
1979{
1980    _PyMem_DebugCheckGIL();
1981    return _PyMem_DebugRawMalloc(ctx, nbytes);
1982}
1983
1984static void *
1985_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
1986{
1987    _PyMem_DebugCheckGIL();
1988    return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
1989}
1990
1991static void
1992_PyMem_DebugFree(void *ctx, void *ptr)
1993{
1994    _PyMem_DebugCheckGIL();
1995    _PyMem_DebugRawFree(ctx, ptr);
1996}
1997
1998static void *
1999_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2000{
2001    _PyMem_DebugCheckGIL();
2002    return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2003}
2004
2005/* Check the forbidden bytes on both ends of the memory allocated for p.
2006 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2007 * and call Py_FatalError to kill the program.
2008 * The API id, is also checked.
2009 */
2010static void
2011_PyMem_DebugCheckAddress(char api, const void *p)
2012{
2013    const uint8_t *q = (const uint8_t *)p;
2014    char msgbuf[64];
2015    char *msg;
2016    size_t nbytes;
2017    const uint8_t *tail;
2018    int i;
2019    char id;
2020
2021    if (p == NULL) {
2022        msg = "didn't expect a NULL pointer";
2023        goto error;
2024    }
2025
2026    /* Check the API id */
2027    id = (char)q[-SST];
2028    if (id != api) {
2029        msg = msgbuf;
2030        snprintf(msg, sizeof(msgbuf), "bad ID: Allocated using API '%c', verified using API '%c'", id, api);
2031        msgbuf[sizeof(msgbuf)-1] = 0;
2032        goto error;
2033    }
2034
2035    /* Check the stuff at the start of p first:  if there's underwrite
2036     * corruption, the number-of-bytes field may be nuts, and checking
2037     * the tail could lead to a segfault then.
2038     */
2039    for (i = SST-1; i >= 1; --i) {
2040        if (*(q-i) != FORBIDDENBYTE) {
2041            msg = "bad leading pad byte";
2042            goto error;
2043        }
2044    }
2045
2046    nbytes = read_size_t(q - 2*SST);
2047    tail = q + nbytes;
2048    for (i = 0; i < SST; ++i) {
2049        if (tail[i] != FORBIDDENBYTE) {
2050            msg = "bad trailing pad byte";
2051            goto error;
2052        }
2053    }
2054
2055    return;
2056
2057error:
2058    _PyObject_DebugDumpAddress(p);
2059    Py_FatalError(msg);
2060}
2061
2062/* Display info to stderr about the memory block at p. */
2063static void
2064_PyObject_DebugDumpAddress(const void *p)
2065{
2066    const uint8_t *q = (const uint8_t *)p;
2067    const uint8_t *tail;
2068    size_t nbytes, serial;
2069    int i;
2070    int ok;
2071    char id;
2072
2073    fprintf(stderr, "Debug memory block at address p=%p:", p);
2074    if (p == NULL) {
2075        fprintf(stderr, "\n");
2076        return;
2077    }
2078    id = (char)q[-SST];
2079    fprintf(stderr, " API '%c'\n", id);
2080
2081    nbytes = read_size_t(q - 2*SST);
2082    fprintf(stderr, "    %" PY_FORMAT_SIZE_T "u bytes originally "
2083                    "requested\n", nbytes);
2084
2085    /* In case this is nuts, check the leading pad bytes first. */
2086    fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
2087    ok = 1;
2088    for (i = 1; i <= SST-1; ++i) {
2089        if (*(q-i) != FORBIDDENBYTE) {
2090            ok = 0;
2091            break;
2092        }
2093    }
2094    if (ok)
2095        fputs("FORBIDDENBYTE, as expected.\n", stderr);
2096    else {
2097        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2098            FORBIDDENBYTE);
2099        for (i = SST-1; i >= 1; --i) {
2100            const uint8_t byte = *(q-i);
2101            fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
2102            if (byte != FORBIDDENBYTE)
2103                fputs(" *** OUCH", stderr);
2104            fputc('\n', stderr);
2105        }
2106
2107        fputs("    Because memory is corrupted at the start, the "
2108              "count of bytes requested\n"
2109              "       may be bogus, and checking the trailing pad "
2110              "bytes may segfault.\n", stderr);
2111    }
2112
2113    tail = q + nbytes;
2114    fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, tail);
2115    ok = 1;
2116    for (i = 0; i < SST; ++i) {
2117        if (tail[i] != FORBIDDENBYTE) {
2118            ok = 0;
2119            break;
2120        }
2121    }
2122    if (ok)
2123        fputs("FORBIDDENBYTE, as expected.\n", stderr);
2124    else {
2125        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2126                FORBIDDENBYTE);
2127        for (i = 0; i < SST; ++i) {
2128            const uint8_t byte = tail[i];
2129            fprintf(stderr, "        at tail+%d: 0x%02x",
2130                    i, byte);
2131            if (byte != FORBIDDENBYTE)
2132                fputs(" *** OUCH", stderr);
2133            fputc('\n', stderr);
2134        }
2135    }
2136
2137    serial = read_size_t(tail + SST);
2138    fprintf(stderr, "    The block was made by call #%" PY_FORMAT_SIZE_T
2139                    "u to debug malloc/realloc.\n", serial);
2140
2141    if (nbytes > 0) {
2142        i = 0;
2143        fputs("    Data at p:", stderr);
2144        /* print up to 8 bytes at the start */
2145        while (q < tail && i < 8) {
2146            fprintf(stderr, " %02x", *q);
2147            ++i;
2148            ++q;
2149        }
2150        /* and up to 8 at the end */
2151        if (q < tail) {
2152            if (tail - q > 8) {
2153                fputs(" ...", stderr);
2154                q = tail - 8;
2155            }
2156            while (q < tail) {
2157                fprintf(stderr, " %02x", *q);
2158                ++q;
2159            }
2160        }
2161        fputc('\n', stderr);
2162    }
2163    fputc('\n', stderr);
2164
2165    fflush(stderr);
2166    _PyMem_DumpTraceback(fileno(stderr), p);
2167}
2168
2169
2170static size_t
2171printone(FILE *out, const char* msg, size_t value)
2172{
2173    int i, k;
2174    char buf[100];
2175    size_t origvalue = value;
2176
2177    fputs(msg, out);
2178    for (i = (int)strlen(msg); i < 35; ++i)
2179        fputc(' ', out);
2180    fputc('=', out);
2181
2182    /* Write the value with commas. */
2183    i = 22;
2184    buf[i--] = '\0';
2185    buf[i--] = '\n';
2186    k = 3;
2187    do {
2188        size_t nextvalue = value / 10;
2189        unsigned int digit = (unsigned int)(value - nextvalue * 10);
2190        value = nextvalue;
2191        buf[i--] = (char)(digit + '0');
2192        --k;
2193        if (k == 0 && value && i >= 0) {
2194            k = 3;
2195            buf[i--] = ',';
2196        }
2197    } while (value && i >= 0);
2198
2199    while (i >= 0)
2200        buf[i--] = ' ';
2201    fputs(buf, out);
2202
2203    return origvalue;
2204}
2205
2206void
2207_PyDebugAllocatorStats(FILE *out,
2208                       const char *block_name, int num_blocks, size_t sizeof_block)
2209{
2210    char buf1[128];
2211    char buf2[128];
2212    PyOS_snprintf(buf1, sizeof(buf1),
2213                  "%d %ss * %" PY_FORMAT_SIZE_T "d bytes each",
2214                  num_blocks, block_name, sizeof_block);
2215    PyOS_snprintf(buf2, sizeof(buf2),
2216                  "%48s ", buf1);
2217    (void)printone(out, buf2, num_blocks * sizeof_block);
2218}
2219
2220
2221#ifdef WITH_PYMALLOC
2222
2223#ifdef Py_DEBUG
2224/* Is target in the list?  The list is traversed via the nextpool pointers.
2225 * The list may be NULL-terminated, or circular.  Return 1 if target is in
2226 * list, else 0.
2227 */
2228static int
2229pool_is_in_list(const poolp target, poolp list)
2230{
2231    poolp origlist = list;
2232    assert(target != NULL);
2233    if (list == NULL)
2234        return 0;
2235    do {
2236        if (target == list)
2237            return 1;
2238        list = list->nextpool;
2239    } while (list != NULL && list != origlist);
2240    return 0;
2241}
2242#endif
2243
2244/* Print summary info to "out" about the state of pymalloc's structures.
2245 * In Py_DEBUG mode, also perform some expensive internal consistency
2246 * checks.
2247 */
2248void
2249_PyObject_DebugMallocStats(FILE *out)
2250{
2251    uint i;
2252    const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2253    /* # of pools, allocated blocks, and free blocks per class index */
2254    size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2255    size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2256    size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2257    /* total # of allocated bytes in used and full pools */
2258    size_t allocated_bytes = 0;
2259    /* total # of available bytes in used pools */
2260    size_t available_bytes = 0;
2261    /* # of free pools + pools not yet carved out of current arena */
2262    uint numfreepools = 0;
2263    /* # of bytes for arena alignment padding */
2264    size_t arena_alignment = 0;
2265    /* # of bytes in used and full pools used for pool_headers */
2266    size_t pool_header_bytes = 0;
2267    /* # of bytes in used and full pools wasted due to quantization,
2268     * i.e. the necessarily leftover space at the ends of used and
2269     * full pools.
2270     */
2271    size_t quantization = 0;
2272    /* # of arenas actually allocated. */
2273    size_t narenas = 0;
2274    /* running total -- should equal narenas * ARENA_SIZE */
2275    size_t total;
2276    char buf[128];
2277
2278    fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2279            SMALL_REQUEST_THRESHOLD, numclasses);
2280
2281    for (i = 0; i < numclasses; ++i)
2282        numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2283
2284    /* Because full pools aren't linked to from anything, it's easiest
2285     * to march over all the arenas.  If we're lucky, most of the memory
2286     * will be living in full pools -- would be a shame to miss them.
2287     */
2288    for (i = 0; i < maxarenas; ++i) {
2289        uint j;
2290        uintptr_t base = arenas[i].address;
2291
2292        /* Skip arenas which are not allocated. */
2293        if (arenas[i].address == (uintptr_t)NULL)
2294            continue;
2295        narenas += 1;
2296
2297        numfreepools += arenas[i].nfreepools;
2298
2299        /* round up to pool alignment */
2300        if (base & (uintptr_t)POOL_SIZE_MASK) {
2301            arena_alignment += POOL_SIZE;
2302            base &= ~(uintptr_t)POOL_SIZE_MASK;
2303            base += POOL_SIZE;
2304        }
2305
2306        /* visit every pool in the arena */
2307        assert(base <= (uintptr_t) arenas[i].pool_address);
2308        for (j = 0; base < (uintptr_t) arenas[i].pool_address;
2309             ++j, base += POOL_SIZE) {
2310            poolp p = (poolp)base;
2311            const uint sz = p->szidx;
2312            uint freeblocks;
2313
2314            if (p->ref.count == 0) {
2315                /* currently unused */
2316#ifdef Py_DEBUG
2317                assert(pool_is_in_list(p, arenas[i].freepools));
2318#endif
2319                continue;
2320            }
2321            ++numpools[sz];
2322            numblocks[sz] += p->ref.count;
2323            freeblocks = NUMBLOCKS(sz) - p->ref.count;
2324            numfreeblocks[sz] += freeblocks;
2325#ifdef Py_DEBUG
2326            if (freeblocks > 0)
2327                assert(pool_is_in_list(p, usedpools[sz + sz]));
2328#endif
2329        }
2330    }
2331    assert(narenas == narenas_currently_allocated);
2332
2333    fputc('\n', out);
2334    fputs("class   size   num pools   blocks in use  avail blocks\n"
2335          "-----   ----   ---------   -------------  ------------\n",
2336          out);
2337
2338    for (i = 0; i < numclasses; ++i) {
2339        size_t p = numpools[i];
2340        size_t b = numblocks[i];
2341        size_t f = numfreeblocks[i];
2342        uint size = INDEX2SIZE(i);
2343        if (p == 0) {
2344            assert(b == 0 && f == 0);
2345            continue;
2346        }
2347        fprintf(out, "%5u %6u "
2348                        "%11" PY_FORMAT_SIZE_T "u "
2349                        "%15" PY_FORMAT_SIZE_T "u "
2350                        "%13" PY_FORMAT_SIZE_T "u\n",
2351                i, size, p, b, f);
2352        allocated_bytes += b * size;
2353        available_bytes += f * size;
2354        pool_header_bytes += p * POOL_OVERHEAD;
2355        quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
2356    }
2357    fputc('\n', out);
2358    if (_PyMem_DebugEnabled())
2359        (void)printone(out, "# times object malloc called", serialno);
2360    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
2361    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
2362    (void)printone(out, "# arenas highwater mark", narenas_highwater);
2363    (void)printone(out, "# arenas allocated current", narenas);
2364
2365    PyOS_snprintf(buf, sizeof(buf),
2366        "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
2367        narenas, ARENA_SIZE);
2368    (void)printone(out, buf, narenas * ARENA_SIZE);
2369
2370    fputc('\n', out);
2371
2372    total = printone(out, "# bytes in allocated blocks", allocated_bytes);
2373    total += printone(out, "# bytes in available blocks", available_bytes);
2374
2375    PyOS_snprintf(buf, sizeof(buf),
2376        "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
2377    total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
2378
2379    total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
2380    total += printone(out, "# bytes lost to quantization", quantization);
2381    total += printone(out, "# bytes lost to arena alignment", arena_alignment);
2382    (void)printone(out, "Total", total);
2383}
2384
2385#endif /* #ifdef WITH_PYMALLOC */
2386