1#include "Python.h" 2 3#include <stdbool.h> 4 5 6/* Defined in tracemalloc.c */ 7extern void _PyMem_DumpTraceback(int fd, const void *ptr); 8 9 10/* Python's malloc wrappers (see pymem.h) */ 11 12#undef uint 13#define uint unsigned int /* assuming >= 16 bits */ 14 15/* Forward declaration */ 16static void* _PyMem_DebugRawMalloc(void *ctx, size_t size); 17static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize); 18static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size); 19static void _PyMem_DebugRawFree(void *ctx, void *p); 20 21static void* _PyMem_DebugMalloc(void *ctx, size_t size); 22static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize); 23static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size); 24static void _PyMem_DebugFree(void *ctx, void *p); 25 26static void _PyObject_DebugDumpAddress(const void *p); 27static void _PyMem_DebugCheckAddress(char api_id, const void *p); 28 29#if defined(__has_feature) /* Clang */ 30 #if __has_feature(address_sanitizer) /* is ASAN enabled? */ 31 #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \ 32 __attribute__((no_address_safety_analysis)) 33 #else 34 #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS 35 #endif 36#else 37 #if defined(__SANITIZE_ADDRESS__) /* GCC 4.8.x, is ASAN enabled? */ 38 #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \ 39 __attribute__((no_address_safety_analysis)) 40 #else 41 #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS 42 #endif 43#endif 44 45#ifdef WITH_PYMALLOC 46 47#ifdef MS_WINDOWS 48# include <windows.h> 49#elif defined(HAVE_MMAP) 50# include <sys/mman.h> 51# ifdef MAP_ANONYMOUS 52# define ARENAS_USE_MMAP 53# endif 54#endif 55 56/* Forward declaration */ 57static void* _PyObject_Malloc(void *ctx, size_t size); 58static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize); 59static void _PyObject_Free(void *ctx, void *p); 60static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size); 61#endif 62 63 64static void * 65_PyMem_RawMalloc(void *ctx, size_t size) 66{ 67 /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL 68 for malloc(0), which would be treated as an error. Some platforms would 69 return a pointer with no memory behind it, which would break pymalloc. 70 To solve these problems, allocate an extra byte. */ 71 if (size == 0) 72 size = 1; 73 return malloc(size); 74} 75 76static void * 77_PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize) 78{ 79 /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL 80 for calloc(0, 0), which would be treated as an error. Some platforms 81 would return a pointer with no memory behind it, which would break 82 pymalloc. To solve these problems, allocate an extra byte. */ 83 if (nelem == 0 || elsize == 0) { 84 nelem = 1; 85 elsize = 1; 86 } 87 return calloc(nelem, elsize); 88} 89 90static void * 91_PyMem_RawRealloc(void *ctx, void *ptr, size_t size) 92{ 93 if (size == 0) 94 size = 1; 95 return realloc(ptr, size); 96} 97 98static void 99_PyMem_RawFree(void *ctx, void *ptr) 100{ 101 free(ptr); 102} 103 104 105#ifdef MS_WINDOWS 106static void * 107_PyObject_ArenaVirtualAlloc(void *ctx, size_t size) 108{ 109 return VirtualAlloc(NULL, size, 110 MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); 111} 112 113static void 114_PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size) 115{ 116 VirtualFree(ptr, 0, MEM_RELEASE); 117} 118 119#elif defined(ARENAS_USE_MMAP) 120static void * 121_PyObject_ArenaMmap(void *ctx, size_t size) 122{ 123 void *ptr; 124 ptr = mmap(NULL, size, PROT_READ|PROT_WRITE, 125 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 126 if (ptr == MAP_FAILED) 127 return NULL; 128 assert(ptr != NULL); 129 return ptr; 130} 131 132static void 133_PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size) 134{ 135 munmap(ptr, size); 136} 137 138#else 139static void * 140_PyObject_ArenaMalloc(void *ctx, size_t size) 141{ 142 return malloc(size); 143} 144 145static void 146_PyObject_ArenaFree(void *ctx, void *ptr, size_t size) 147{ 148 free(ptr); 149} 150#endif 151 152 153#define PYRAW_FUNCS _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree 154#ifdef WITH_PYMALLOC 155# define PYOBJ_FUNCS _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free 156#else 157# define PYOBJ_FUNCS PYRAW_FUNCS 158#endif 159#define PYMEM_FUNCS PYOBJ_FUNCS 160 161typedef struct { 162 /* We tag each block with an API ID in order to tag API violations */ 163 char api_id; 164 PyMemAllocatorEx alloc; 165} debug_alloc_api_t; 166static struct { 167 debug_alloc_api_t raw; 168 debug_alloc_api_t mem; 169 debug_alloc_api_t obj; 170} _PyMem_Debug = { 171 {'r', {NULL, PYRAW_FUNCS}}, 172 {'m', {NULL, PYMEM_FUNCS}}, 173 {'o', {NULL, PYOBJ_FUNCS}} 174 }; 175 176#define PYRAWDBG_FUNCS \ 177 _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree 178#define PYDBG_FUNCS \ 179 _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree 180 181static PyMemAllocatorEx _PyMem_Raw = { 182#ifdef Py_DEBUG 183 &_PyMem_Debug.raw, PYRAWDBG_FUNCS 184#else 185 NULL, PYRAW_FUNCS 186#endif 187 }; 188 189static PyMemAllocatorEx _PyMem = { 190#ifdef Py_DEBUG 191 &_PyMem_Debug.mem, PYDBG_FUNCS 192#else 193 NULL, PYMEM_FUNCS 194#endif 195 }; 196 197static PyMemAllocatorEx _PyObject = { 198#ifdef Py_DEBUG 199 &_PyMem_Debug.obj, PYDBG_FUNCS 200#else 201 NULL, PYOBJ_FUNCS 202#endif 203 }; 204 205int 206_PyMem_SetupAllocators(const char *opt) 207{ 208 if (opt == NULL || *opt == '\0') { 209 /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line 210 options): use default allocators */ 211#ifdef Py_DEBUG 212# ifdef WITH_PYMALLOC 213 opt = "pymalloc_debug"; 214# else 215 opt = "malloc_debug"; 216# endif 217#else 218 /* !Py_DEBUG */ 219# ifdef WITH_PYMALLOC 220 opt = "pymalloc"; 221# else 222 opt = "malloc"; 223# endif 224#endif 225 } 226 227 if (strcmp(opt, "debug") == 0) { 228 PyMem_SetupDebugHooks(); 229 } 230 else if (strcmp(opt, "malloc") == 0 || strcmp(opt, "malloc_debug") == 0) 231 { 232 PyMemAllocatorEx alloc = {NULL, PYRAW_FUNCS}; 233 234 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc); 235 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc); 236 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc); 237 238 if (strcmp(opt, "malloc_debug") == 0) 239 PyMem_SetupDebugHooks(); 240 } 241#ifdef WITH_PYMALLOC 242 else if (strcmp(opt, "pymalloc") == 0 243 || strcmp(opt, "pymalloc_debug") == 0) 244 { 245 PyMemAllocatorEx raw_alloc = {NULL, PYRAW_FUNCS}; 246 PyMemAllocatorEx mem_alloc = {NULL, PYMEM_FUNCS}; 247 PyMemAllocatorEx obj_alloc = {NULL, PYOBJ_FUNCS}; 248 249 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &raw_alloc); 250 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &mem_alloc); 251 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &obj_alloc); 252 253 if (strcmp(opt, "pymalloc_debug") == 0) 254 PyMem_SetupDebugHooks(); 255 } 256#endif 257 else { 258 /* unknown allocator */ 259 return -1; 260 } 261 return 0; 262} 263 264#undef PYRAW_FUNCS 265#undef PYMEM_FUNCS 266#undef PYOBJ_FUNCS 267#undef PYRAWDBG_FUNCS 268#undef PYDBG_FUNCS 269 270static PyObjectArenaAllocator _PyObject_Arena = {NULL, 271#ifdef MS_WINDOWS 272 _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree 273#elif defined(ARENAS_USE_MMAP) 274 _PyObject_ArenaMmap, _PyObject_ArenaMunmap 275#else 276 _PyObject_ArenaMalloc, _PyObject_ArenaFree 277#endif 278 }; 279 280#ifdef WITH_PYMALLOC 281static int 282_PyMem_DebugEnabled(void) 283{ 284 return (_PyObject.malloc == _PyMem_DebugMalloc); 285} 286 287int 288_PyMem_PymallocEnabled(void) 289{ 290 if (_PyMem_DebugEnabled()) { 291 return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc); 292 } 293 else { 294 return (_PyObject.malloc == _PyObject_Malloc); 295 } 296} 297#endif 298 299void 300PyMem_SetupDebugHooks(void) 301{ 302 PyMemAllocatorEx alloc; 303 304 alloc.malloc = _PyMem_DebugRawMalloc; 305 alloc.calloc = _PyMem_DebugRawCalloc; 306 alloc.realloc = _PyMem_DebugRawRealloc; 307 alloc.free = _PyMem_DebugRawFree; 308 309 if (_PyMem_Raw.malloc != _PyMem_DebugRawMalloc) { 310 alloc.ctx = &_PyMem_Debug.raw; 311 PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc); 312 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc); 313 } 314 315 alloc.malloc = _PyMem_DebugMalloc; 316 alloc.calloc = _PyMem_DebugCalloc; 317 alloc.realloc = _PyMem_DebugRealloc; 318 alloc.free = _PyMem_DebugFree; 319 320 if (_PyMem.malloc != _PyMem_DebugMalloc) { 321 alloc.ctx = &_PyMem_Debug.mem; 322 PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc); 323 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc); 324 } 325 326 if (_PyObject.malloc != _PyMem_DebugMalloc) { 327 alloc.ctx = &_PyMem_Debug.obj; 328 PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc); 329 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc); 330 } 331} 332 333void 334PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) 335{ 336 switch(domain) 337 { 338 case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break; 339 case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break; 340 case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break; 341 default: 342 /* unknown domain: set all attributes to NULL */ 343 allocator->ctx = NULL; 344 allocator->malloc = NULL; 345 allocator->calloc = NULL; 346 allocator->realloc = NULL; 347 allocator->free = NULL; 348 } 349} 350 351void 352PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) 353{ 354 switch(domain) 355 { 356 case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break; 357 case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break; 358 case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break; 359 /* ignore unknown domain */ 360 } 361} 362 363void 364PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator) 365{ 366 *allocator = _PyObject_Arena; 367} 368 369void 370PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator) 371{ 372 _PyObject_Arena = *allocator; 373} 374 375void * 376PyMem_RawMalloc(size_t size) 377{ 378 /* 379 * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes. 380 * Most python internals blindly use a signed Py_ssize_t to track 381 * things without checking for overflows or negatives. 382 * As size_t is unsigned, checking for size < 0 is not required. 383 */ 384 if (size > (size_t)PY_SSIZE_T_MAX) 385 return NULL; 386 return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size); 387} 388 389void * 390PyMem_RawCalloc(size_t nelem, size_t elsize) 391{ 392 /* see PyMem_RawMalloc() */ 393 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize) 394 return NULL; 395 return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize); 396} 397 398void* 399PyMem_RawRealloc(void *ptr, size_t new_size) 400{ 401 /* see PyMem_RawMalloc() */ 402 if (new_size > (size_t)PY_SSIZE_T_MAX) 403 return NULL; 404 return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size); 405} 406 407void PyMem_RawFree(void *ptr) 408{ 409 _PyMem_Raw.free(_PyMem_Raw.ctx, ptr); 410} 411 412void * 413PyMem_Malloc(size_t size) 414{ 415 /* see PyMem_RawMalloc() */ 416 if (size > (size_t)PY_SSIZE_T_MAX) 417 return NULL; 418 return _PyMem.malloc(_PyMem.ctx, size); 419} 420 421void * 422PyMem_Calloc(size_t nelem, size_t elsize) 423{ 424 /* see PyMem_RawMalloc() */ 425 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize) 426 return NULL; 427 return _PyMem.calloc(_PyMem.ctx, nelem, elsize); 428} 429 430void * 431PyMem_Realloc(void *ptr, size_t new_size) 432{ 433 /* see PyMem_RawMalloc() */ 434 if (new_size > (size_t)PY_SSIZE_T_MAX) 435 return NULL; 436 return _PyMem.realloc(_PyMem.ctx, ptr, new_size); 437} 438 439void 440PyMem_Free(void *ptr) 441{ 442 _PyMem.free(_PyMem.ctx, ptr); 443} 444 445char * 446_PyMem_RawStrdup(const char *str) 447{ 448 size_t size; 449 char *copy; 450 451 size = strlen(str) + 1; 452 copy = PyMem_RawMalloc(size); 453 if (copy == NULL) 454 return NULL; 455 memcpy(copy, str, size); 456 return copy; 457} 458 459char * 460_PyMem_Strdup(const char *str) 461{ 462 size_t size; 463 char *copy; 464 465 size = strlen(str) + 1; 466 copy = PyMem_Malloc(size); 467 if (copy == NULL) 468 return NULL; 469 memcpy(copy, str, size); 470 return copy; 471} 472 473void * 474PyObject_Malloc(size_t size) 475{ 476 /* see PyMem_RawMalloc() */ 477 if (size > (size_t)PY_SSIZE_T_MAX) 478 return NULL; 479 return _PyObject.malloc(_PyObject.ctx, size); 480} 481 482void * 483PyObject_Calloc(size_t nelem, size_t elsize) 484{ 485 /* see PyMem_RawMalloc() */ 486 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize) 487 return NULL; 488 return _PyObject.calloc(_PyObject.ctx, nelem, elsize); 489} 490 491void * 492PyObject_Realloc(void *ptr, size_t new_size) 493{ 494 /* see PyMem_RawMalloc() */ 495 if (new_size > (size_t)PY_SSIZE_T_MAX) 496 return NULL; 497 return _PyObject.realloc(_PyObject.ctx, ptr, new_size); 498} 499 500void 501PyObject_Free(void *ptr) 502{ 503 _PyObject.free(_PyObject.ctx, ptr); 504} 505 506 507#ifdef WITH_PYMALLOC 508 509#ifdef WITH_VALGRIND 510#include <valgrind/valgrind.h> 511 512/* If we're using GCC, use __builtin_expect() to reduce overhead of 513 the valgrind checks */ 514#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__) 515# define UNLIKELY(value) __builtin_expect((value), 0) 516#else 517# define UNLIKELY(value) (value) 518#endif 519 520/* -1 indicates that we haven't checked that we're running on valgrind yet. */ 521static int running_on_valgrind = -1; 522#endif 523 524/* An object allocator for Python. 525 526 Here is an introduction to the layers of the Python memory architecture, 527 showing where the object allocator is actually used (layer +2), It is 528 called for every object allocation and deallocation (PyObject_New/Del), 529 unless the object-specific allocators implement a proprietary allocation 530 scheme (ex.: ints use a simple free list). This is also the place where 531 the cyclic garbage collector operates selectively on container objects. 532 533 534 Object-specific allocators 535 _____ ______ ______ ________ 536 [ int ] [ dict ] [ list ] ... [ string ] Python core | 537+3 | <----- Object-specific memory -----> | <-- Non-object memory --> | 538 _______________________________ | | 539 [ Python's object allocator ] | | 540+2 | ####### Object memory ####### | <------ Internal buffers ------> | 541 ______________________________________________________________ | 542 [ Python's raw memory allocator (PyMem_ API) ] | 543+1 | <----- Python memory (under PyMem manager's control) ------> | | 544 __________________________________________________________________ 545 [ Underlying general-purpose allocator (ex: C library malloc) ] 546 0 | <------ Virtual memory allocated for the python process -------> | 547 548 ========================================================================= 549 _______________________________________________________________________ 550 [ OS-specific Virtual Memory Manager (VMM) ] 551-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> | 552 __________________________________ __________________________________ 553 [ ] [ ] 554-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> | 555 556*/ 557/*==========================================================================*/ 558 559/* A fast, special-purpose memory allocator for small blocks, to be used 560 on top of a general-purpose malloc -- heavily based on previous art. */ 561 562/* Vladimir Marangozov -- August 2000 */ 563 564/* 565 * "Memory management is where the rubber meets the road -- if we do the wrong 566 * thing at any level, the results will not be good. And if we don't make the 567 * levels work well together, we are in serious trouble." (1) 568 * 569 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles, 570 * "Dynamic Storage Allocation: A Survey and Critical Review", 571 * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995. 572 */ 573 574/* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */ 575 576/*==========================================================================*/ 577 578/* 579 * Allocation strategy abstract: 580 * 581 * For small requests, the allocator sub-allocates <Big> blocks of memory. 582 * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the 583 * system's allocator. 584 * 585 * Small requests are grouped in size classes spaced 8 bytes apart, due 586 * to the required valid alignment of the returned address. Requests of 587 * a particular size are serviced from memory pools of 4K (one VMM page). 588 * Pools are fragmented on demand and contain free lists of blocks of one 589 * particular size class. In other words, there is a fixed-size allocator 590 * for each size class. Free pools are shared by the different allocators 591 * thus minimizing the space reserved for a particular size class. 592 * 593 * This allocation strategy is a variant of what is known as "simple 594 * segregated storage based on array of free lists". The main drawback of 595 * simple segregated storage is that we might end up with lot of reserved 596 * memory for the different free lists, which degenerate in time. To avoid 597 * this, we partition each free list in pools and we share dynamically the 598 * reserved space between all free lists. This technique is quite efficient 599 * for memory intensive programs which allocate mainly small-sized blocks. 600 * 601 * For small requests we have the following table: 602 * 603 * Request in bytes Size of allocated block Size class idx 604 * ---------------------------------------------------------------- 605 * 1-8 8 0 606 * 9-16 16 1 607 * 17-24 24 2 608 * 25-32 32 3 609 * 33-40 40 4 610 * 41-48 48 5 611 * 49-56 56 6 612 * 57-64 64 7 613 * 65-72 72 8 614 * ... ... ... 615 * 497-504 504 62 616 * 505-512 512 63 617 * 618 * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying 619 * allocator. 620 */ 621 622/*==========================================================================*/ 623 624/* 625 * -- Main tunable settings section -- 626 */ 627 628/* 629 * Alignment of addresses returned to the user. 8-bytes alignment works 630 * on most current architectures (with 32-bit or 64-bit address busses). 631 * The alignment value is also used for grouping small requests in size 632 * classes spaced ALIGNMENT bytes apart. 633 * 634 * You shouldn't change this unless you know what you are doing. 635 */ 636#define ALIGNMENT 8 /* must be 2^N */ 637#define ALIGNMENT_SHIFT 3 638 639/* Return the number of bytes in size class I, as a uint. */ 640#define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT) 641 642/* 643 * Max size threshold below which malloc requests are considered to be 644 * small enough in order to use preallocated memory pools. You can tune 645 * this value according to your application behaviour and memory needs. 646 * 647 * Note: a size threshold of 512 guarantees that newly created dictionaries 648 * will be allocated from preallocated memory pools on 64-bit. 649 * 650 * The following invariants must hold: 651 * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512 652 * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT 653 * 654 * Although not required, for better performance and space efficiency, 655 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2. 656 */ 657#define SMALL_REQUEST_THRESHOLD 512 658#define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT) 659 660/* 661 * The system's VMM page size can be obtained on most unices with a 662 * getpagesize() call or deduced from various header files. To make 663 * things simpler, we assume that it is 4K, which is OK for most systems. 664 * It is probably better if this is the native page size, but it doesn't 665 * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page 666 * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation 667 * violation fault. 4K is apparently OK for all the platforms that python 668 * currently targets. 669 */ 670#define SYSTEM_PAGE_SIZE (4 * 1024) 671#define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1) 672 673/* 674 * Maximum amount of memory managed by the allocator for small requests. 675 */ 676#ifdef WITH_MEMORY_LIMITS 677#ifndef SMALL_MEMORY_LIMIT 678#define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */ 679#endif 680#endif 681 682/* 683 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned 684 * on a page boundary. This is a reserved virtual address space for the 685 * current process (obtained through a malloc()/mmap() call). In no way this 686 * means that the memory arenas will be used entirely. A malloc(<Big>) is 687 * usually an address range reservation for <Big> bytes, unless all pages within 688 * this space are referenced subsequently. So malloc'ing big blocks and not 689 * using them does not mean "wasting memory". It's an addressable range 690 * wastage... 691 * 692 * Arenas are allocated with mmap() on systems supporting anonymous memory 693 * mappings to reduce heap fragmentation. 694 */ 695#define ARENA_SIZE (256 << 10) /* 256KB */ 696 697#ifdef WITH_MEMORY_LIMITS 698#define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE) 699#endif 700 701/* 702 * Size of the pools used for small blocks. Should be a power of 2, 703 * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k. 704 */ 705#define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */ 706#define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK 707 708/* 709 * -- End of tunable settings section -- 710 */ 711 712/*==========================================================================*/ 713 714/* 715 * Locking 716 * 717 * To reduce lock contention, it would probably be better to refine the 718 * crude function locking with per size class locking. I'm not positive 719 * however, whether it's worth switching to such locking policy because 720 * of the performance penalty it might introduce. 721 * 722 * The following macros describe the simplest (should also be the fastest) 723 * lock object on a particular platform and the init/fini/lock/unlock 724 * operations on it. The locks defined here are not expected to be recursive 725 * because it is assumed that they will always be called in the order: 726 * INIT, [LOCK, UNLOCK]*, FINI. 727 */ 728 729/* 730 * Python's threads are serialized, so object malloc locking is disabled. 731 */ 732#define SIMPLELOCK_DECL(lock) /* simple lock declaration */ 733#define SIMPLELOCK_INIT(lock) /* allocate (if needed) and initialize */ 734#define SIMPLELOCK_FINI(lock) /* free/destroy an existing lock */ 735#define SIMPLELOCK_LOCK(lock) /* acquire released lock */ 736#define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */ 737 738/* When you say memory, my mind reasons in terms of (pointers to) blocks */ 739typedef uint8_t block; 740 741/* Pool for small blocks. */ 742struct pool_header { 743 union { block *_padding; 744 uint count; } ref; /* number of allocated blocks */ 745 block *freeblock; /* pool's free list head */ 746 struct pool_header *nextpool; /* next pool of this size class */ 747 struct pool_header *prevpool; /* previous pool "" */ 748 uint arenaindex; /* index into arenas of base adr */ 749 uint szidx; /* block size class index */ 750 uint nextoffset; /* bytes to virgin block */ 751 uint maxnextoffset; /* largest valid nextoffset */ 752}; 753 754typedef struct pool_header *poolp; 755 756/* Record keeping for arenas. */ 757struct arena_object { 758 /* The address of the arena, as returned by malloc. Note that 0 759 * will never be returned by a successful malloc, and is used 760 * here to mark an arena_object that doesn't correspond to an 761 * allocated arena. 762 */ 763 uintptr_t address; 764 765 /* Pool-aligned pointer to the next pool to be carved off. */ 766 block* pool_address; 767 768 /* The number of available pools in the arena: free pools + never- 769 * allocated pools. 770 */ 771 uint nfreepools; 772 773 /* The total number of pools in the arena, whether or not available. */ 774 uint ntotalpools; 775 776 /* Singly-linked list of available pools. */ 777 struct pool_header* freepools; 778 779 /* Whenever this arena_object is not associated with an allocated 780 * arena, the nextarena member is used to link all unassociated 781 * arena_objects in the singly-linked `unused_arena_objects` list. 782 * The prevarena member is unused in this case. 783 * 784 * When this arena_object is associated with an allocated arena 785 * with at least one available pool, both members are used in the 786 * doubly-linked `usable_arenas` list, which is maintained in 787 * increasing order of `nfreepools` values. 788 * 789 * Else this arena_object is associated with an allocated arena 790 * all of whose pools are in use. `nextarena` and `prevarena` 791 * are both meaningless in this case. 792 */ 793 struct arena_object* nextarena; 794 struct arena_object* prevarena; 795}; 796 797#define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT) 798 799#define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */ 800 801/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */ 802#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE)) 803 804/* Return total number of blocks in pool of size index I, as a uint. */ 805#define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I)) 806 807/*==========================================================================*/ 808 809/* 810 * This malloc lock 811 */ 812SIMPLELOCK_DECL(_malloc_lock) 813#define LOCK() SIMPLELOCK_LOCK(_malloc_lock) 814#define UNLOCK() SIMPLELOCK_UNLOCK(_malloc_lock) 815#define LOCK_INIT() SIMPLELOCK_INIT(_malloc_lock) 816#define LOCK_FINI() SIMPLELOCK_FINI(_malloc_lock) 817 818/* 819 * Pool table -- headed, circular, doubly-linked lists of partially used pools. 820 821This is involved. For an index i, usedpools[i+i] is the header for a list of 822all partially used pools holding small blocks with "size class idx" i. So 823usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size 82416, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT. 825 826Pools are carved off an arena's highwater mark (an arena_object's pool_address 827member) as needed. Once carved off, a pool is in one of three states forever 828after: 829 830used == partially used, neither empty nor full 831 At least one block in the pool is currently allocated, and at least one 832 block in the pool is not currently allocated (note this implies a pool 833 has room for at least two blocks). 834 This is a pool's initial state, as a pool is created only when malloc 835 needs space. 836 The pool holds blocks of a fixed size, and is in the circular list headed 837 at usedpools[i] (see above). It's linked to the other used pools of the 838 same size class via the pool_header's nextpool and prevpool members. 839 If all but one block is currently allocated, a malloc can cause a 840 transition to the full state. If all but one block is not currently 841 allocated, a free can cause a transition to the empty state. 842 843full == all the pool's blocks are currently allocated 844 On transition to full, a pool is unlinked from its usedpools[] list. 845 It's not linked to from anything then anymore, and its nextpool and 846 prevpool members are meaningless until it transitions back to used. 847 A free of a block in a full pool puts the pool back in the used state. 848 Then it's linked in at the front of the appropriate usedpools[] list, so 849 that the next allocation for its size class will reuse the freed block. 850 851empty == all the pool's blocks are currently available for allocation 852 On transition to empty, a pool is unlinked from its usedpools[] list, 853 and linked to the front of its arena_object's singly-linked freepools list, 854 via its nextpool member. The prevpool member has no meaning in this case. 855 Empty pools have no inherent size class: the next time a malloc finds 856 an empty list in usedpools[], it takes the first pool off of freepools. 857 If the size class needed happens to be the same as the size class the pool 858 last had, some pool initialization can be skipped. 859 860 861Block Management 862 863Blocks within pools are again carved out as needed. pool->freeblock points to 864the start of a singly-linked list of free blocks within the pool. When a 865block is freed, it's inserted at the front of its pool's freeblock list. Note 866that the available blocks in a pool are *not* linked all together when a pool 867is initialized. Instead only "the first two" (lowest addresses) blocks are 868set up, returning the first such block, and setting pool->freeblock to a 869one-block list holding the second such block. This is consistent with that 870pymalloc strives at all levels (arena, pool, and block) never to touch a piece 871of memory until it's actually needed. 872 873So long as a pool is in the used state, we're certain there *is* a block 874available for allocating, and pool->freeblock is not NULL. If pool->freeblock 875points to the end of the free list before we've carved the entire pool into 876blocks, that means we simply haven't yet gotten to one of the higher-address 877blocks. The offset from the pool_header to the start of "the next" virgin 878block is stored in the pool_header nextoffset member, and the largest value 879of nextoffset that makes sense is stored in the maxnextoffset member when a 880pool is initialized. All the blocks in a pool have been passed out at least 881once when and only when nextoffset > maxnextoffset. 882 883 884Major obscurity: While the usedpools vector is declared to have poolp 885entries, it doesn't really. It really contains two pointers per (conceptual) 886poolp entry, the nextpool and prevpool members of a pool_header. The 887excruciating initialization code below fools C so that 888 889 usedpool[i+i] 890 891"acts like" a genuine poolp, but only so long as you only reference its 892nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is 893compensating for that a pool_header's nextpool and prevpool members 894immediately follow a pool_header's first two members: 895 896 union { block *_padding; 897 uint count; } ref; 898 block *freeblock; 899 900each of which consume sizeof(block *) bytes. So what usedpools[i+i] really 901contains is a fudged-up pointer p such that *if* C believes it's a poolp 902pointer, then p->nextpool and p->prevpool are both p (meaning that the headed 903circular list is empty). 904 905It's unclear why the usedpools setup is so convoluted. It could be to 906minimize the amount of cache required to hold this heavily-referenced table 907(which only *needs* the two interpool pointer members of a pool_header). OTOH, 908referencing code has to remember to "double the index" and doing so isn't 909free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying 910on that C doesn't insert any padding anywhere in a pool_header at or before 911the prevpool member. 912**************************************************************************** */ 913 914#define PTA(x) ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *))) 915#define PT(x) PTA(x), PTA(x) 916 917static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = { 918 PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7) 919#if NB_SMALL_SIZE_CLASSES > 8 920 , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15) 921#if NB_SMALL_SIZE_CLASSES > 16 922 , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23) 923#if NB_SMALL_SIZE_CLASSES > 24 924 , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31) 925#if NB_SMALL_SIZE_CLASSES > 32 926 , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39) 927#if NB_SMALL_SIZE_CLASSES > 40 928 , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47) 929#if NB_SMALL_SIZE_CLASSES > 48 930 , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55) 931#if NB_SMALL_SIZE_CLASSES > 56 932 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63) 933#if NB_SMALL_SIZE_CLASSES > 64 934#error "NB_SMALL_SIZE_CLASSES should be less than 64" 935#endif /* NB_SMALL_SIZE_CLASSES > 64 */ 936#endif /* NB_SMALL_SIZE_CLASSES > 56 */ 937#endif /* NB_SMALL_SIZE_CLASSES > 48 */ 938#endif /* NB_SMALL_SIZE_CLASSES > 40 */ 939#endif /* NB_SMALL_SIZE_CLASSES > 32 */ 940#endif /* NB_SMALL_SIZE_CLASSES > 24 */ 941#endif /* NB_SMALL_SIZE_CLASSES > 16 */ 942#endif /* NB_SMALL_SIZE_CLASSES > 8 */ 943}; 944 945/*========================================================================== 946Arena management. 947 948`arenas` is a vector of arena_objects. It contains maxarenas entries, some of 949which may not be currently used (== they're arena_objects that aren't 950currently associated with an allocated arena). Note that arenas proper are 951separately malloc'ed. 952 953Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5, 954we do try to free() arenas, and use some mild heuristic strategies to increase 955the likelihood that arenas eventually can be freed. 956 957unused_arena_objects 958 959 This is a singly-linked list of the arena_objects that are currently not 960 being used (no arena is associated with them). Objects are taken off the 961 head of the list in new_arena(), and are pushed on the head of the list in 962 PyObject_Free() when the arena is empty. Key invariant: an arena_object 963 is on this list if and only if its .address member is 0. 964 965usable_arenas 966 967 This is a doubly-linked list of the arena_objects associated with arenas 968 that have pools available. These pools are either waiting to be reused, 969 or have not been used before. The list is sorted to have the most- 970 allocated arenas first (ascending order based on the nfreepools member). 971 This means that the next allocation will come from a heavily used arena, 972 which gives the nearly empty arenas a chance to be returned to the system. 973 In my unscientific tests this dramatically improved the number of arenas 974 that could be freed. 975 976Note that an arena_object associated with an arena all of whose pools are 977currently in use isn't on either list. 978*/ 979 980/* Array of objects used to track chunks of memory (arenas). */ 981static struct arena_object* arenas = NULL; 982/* Number of slots currently allocated in the `arenas` vector. */ 983static uint maxarenas = 0; 984 985/* The head of the singly-linked, NULL-terminated list of available 986 * arena_objects. 987 */ 988static struct arena_object* unused_arena_objects = NULL; 989 990/* The head of the doubly-linked, NULL-terminated at each end, list of 991 * arena_objects associated with arenas that have pools available. 992 */ 993static struct arena_object* usable_arenas = NULL; 994 995/* How many arena_objects do we initially allocate? 996 * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the 997 * `arenas` vector. 998 */ 999#define INITIAL_ARENA_OBJECTS 16 1000 1001/* Number of arenas allocated that haven't been free()'d. */ 1002static size_t narenas_currently_allocated = 0; 1003 1004/* Total number of times malloc() called to allocate an arena. */ 1005static size_t ntimes_arena_allocated = 0; 1006/* High water mark (max value ever seen) for narenas_currently_allocated. */ 1007static size_t narenas_highwater = 0; 1008 1009static Py_ssize_t _Py_AllocatedBlocks = 0; 1010 1011Py_ssize_t 1012_Py_GetAllocatedBlocks(void) 1013{ 1014 return _Py_AllocatedBlocks; 1015} 1016 1017 1018/* Allocate a new arena. If we run out of memory, return NULL. Else 1019 * allocate a new arena, and return the address of an arena_object 1020 * describing the new arena. It's expected that the caller will set 1021 * `usable_arenas` to the return value. 1022 */ 1023static struct arena_object* 1024new_arena(void) 1025{ 1026 struct arena_object* arenaobj; 1027 uint excess; /* number of bytes above pool alignment */ 1028 void *address; 1029 static int debug_stats = -1; 1030 1031 if (debug_stats == -1) { 1032 char *opt = Py_GETENV("PYTHONMALLOCSTATS"); 1033 debug_stats = (opt != NULL && *opt != '\0'); 1034 } 1035 if (debug_stats) 1036 _PyObject_DebugMallocStats(stderr); 1037 1038 if (unused_arena_objects == NULL) { 1039 uint i; 1040 uint numarenas; 1041 size_t nbytes; 1042 1043 /* Double the number of arena objects on each allocation. 1044 * Note that it's possible for `numarenas` to overflow. 1045 */ 1046 numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS; 1047 if (numarenas <= maxarenas) 1048 return NULL; /* overflow */ 1049#if SIZEOF_SIZE_T <= SIZEOF_INT 1050 if (numarenas > SIZE_MAX / sizeof(*arenas)) 1051 return NULL; /* overflow */ 1052#endif 1053 nbytes = numarenas * sizeof(*arenas); 1054 arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes); 1055 if (arenaobj == NULL) 1056 return NULL; 1057 arenas = arenaobj; 1058 1059 /* We might need to fix pointers that were copied. However, 1060 * new_arena only gets called when all the pages in the 1061 * previous arenas are full. Thus, there are *no* pointers 1062 * into the old array. Thus, we don't have to worry about 1063 * invalid pointers. Just to be sure, some asserts: 1064 */ 1065 assert(usable_arenas == NULL); 1066 assert(unused_arena_objects == NULL); 1067 1068 /* Put the new arenas on the unused_arena_objects list. */ 1069 for (i = maxarenas; i < numarenas; ++i) { 1070 arenas[i].address = 0; /* mark as unassociated */ 1071 arenas[i].nextarena = i < numarenas - 1 ? 1072 &arenas[i+1] : NULL; 1073 } 1074 1075 /* Update globals. */ 1076 unused_arena_objects = &arenas[maxarenas]; 1077 maxarenas = numarenas; 1078 } 1079 1080 /* Take the next available arena object off the head of the list. */ 1081 assert(unused_arena_objects != NULL); 1082 arenaobj = unused_arena_objects; 1083 unused_arena_objects = arenaobj->nextarena; 1084 assert(arenaobj->address == 0); 1085 address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE); 1086 if (address == NULL) { 1087 /* The allocation failed: return NULL after putting the 1088 * arenaobj back. 1089 */ 1090 arenaobj->nextarena = unused_arena_objects; 1091 unused_arena_objects = arenaobj; 1092 return NULL; 1093 } 1094 arenaobj->address = (uintptr_t)address; 1095 1096 ++narenas_currently_allocated; 1097 ++ntimes_arena_allocated; 1098 if (narenas_currently_allocated > narenas_highwater) 1099 narenas_highwater = narenas_currently_allocated; 1100 arenaobj->freepools = NULL; 1101 /* pool_address <- first pool-aligned address in the arena 1102 nfreepools <- number of whole pools that fit after alignment */ 1103 arenaobj->pool_address = (block*)arenaobj->address; 1104 arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE; 1105 assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE); 1106 excess = (uint)(arenaobj->address & POOL_SIZE_MASK); 1107 if (excess != 0) { 1108 --arenaobj->nfreepools; 1109 arenaobj->pool_address += POOL_SIZE - excess; 1110 } 1111 arenaobj->ntotalpools = arenaobj->nfreepools; 1112 1113 return arenaobj; 1114} 1115 1116/* 1117address_in_range(P, POOL) 1118 1119Return true if and only if P is an address that was allocated by pymalloc. 1120POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P) 1121(the caller is asked to compute this because the macro expands POOL more than 1122once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a 1123variable and pass the latter to the macro; because address_in_range is 1124called on every alloc/realloc/free, micro-efficiency is important here). 1125 1126Tricky: Let B be the arena base address associated with the pool, B = 1127arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if 1128 1129 B <= P < B + ARENA_SIZE 1130 1131Subtracting B throughout, this is true iff 1132 1133 0 <= P-B < ARENA_SIZE 1134 1135By using unsigned arithmetic, the "0 <=" half of the test can be skipped. 1136 1137Obscure: A PyMem "free memory" function can call the pymalloc free or realloc 1138before the first arena has been allocated. `arenas` is still NULL in that 1139case. We're relying on that maxarenas is also 0 in that case, so that 1140(POOL)->arenaindex < maxarenas must be false, saving us from trying to index 1141into a NULL arenas. 1142 1143Details: given P and POOL, the arena_object corresponding to P is AO = 1144arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild 1145stores, etc), POOL is the correct address of P's pool, AO.address is the 1146correct base address of the pool's arena, and P must be within ARENA_SIZE of 1147AO.address. In addition, AO.address is not 0 (no arena can start at address 0 1148(NULL)). Therefore address_in_range correctly reports that obmalloc 1149controls P. 1150 1151Now suppose obmalloc does not control P (e.g., P was obtained via a direct 1152call to the system malloc() or realloc()). (POOL)->arenaindex may be anything 1153in this case -- it may even be uninitialized trash. If the trash arenaindex 1154is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't 1155control P. 1156 1157Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an 1158allocated arena, obmalloc controls all the memory in slice AO.address : 1159AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc, 1160so P doesn't lie in that slice, so the macro correctly reports that P is not 1161controlled by obmalloc. 1162 1163Finally, if P is not controlled by obmalloc and AO corresponds to an unused 1164arena_object (one not currently associated with an allocated arena), 1165AO.address is 0, and the second test in the macro reduces to: 1166 1167 P < ARENA_SIZE 1168 1169If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes 1170that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part 1171of the test still passes, and the third clause (AO.address != 0) is necessary 1172to get the correct result: AO.address is 0 in this case, so the macro 1173correctly reports that P is not controlled by obmalloc (despite that P lies in 1174slice AO.address : AO.address + ARENA_SIZE). 1175 1176Note: The third (AO.address != 0) clause was added in Python 2.5. Before 11772.5, arenas were never free()'ed, and an arenaindex < maxarena always 1178corresponded to a currently-allocated arena, so the "P is not controlled by 1179obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case 1180was impossible. 1181 1182Note that the logic is excruciating, and reading up possibly uninitialized 1183memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex) 1184creates problems for some memory debuggers. The overwhelming advantage is 1185that this test determines whether an arbitrary address is controlled by 1186obmalloc in a small constant time, independent of the number of arenas 1187obmalloc controls. Since this test is needed at every entry point, it's 1188extremely desirable that it be this fast. 1189*/ 1190 1191static bool ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS 1192address_in_range(void *p, poolp pool) 1193{ 1194 // Since address_in_range may be reading from memory which was not allocated 1195 // by Python, it is important that pool->arenaindex is read only once, as 1196 // another thread may be concurrently modifying the value without holding 1197 // the GIL. The following dance forces the compiler to read pool->arenaindex 1198 // only once. 1199 uint arenaindex = *((volatile uint *)&pool->arenaindex); 1200 return arenaindex < maxarenas && 1201 (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE && 1202 arenas[arenaindex].address != 0; 1203} 1204 1205/*==========================================================================*/ 1206 1207/* malloc. Note that nbytes==0 tries to return a non-NULL pointer, distinct 1208 * from all other currently live pointers. This may not be possible. 1209 */ 1210 1211/* 1212 * The basic blocks are ordered by decreasing execution frequency, 1213 * which minimizes the number of jumps in the most common cases, 1214 * improves branching prediction and instruction scheduling (small 1215 * block allocations typically result in a couple of instructions). 1216 * Unless the optimizer reorders everything, being too smart... 1217 */ 1218 1219static void * 1220_PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) 1221{ 1222 size_t nbytes; 1223 block *bp; 1224 poolp pool; 1225 poolp next; 1226 uint size; 1227 1228 _Py_AllocatedBlocks++; 1229 1230 assert(nelem <= PY_SSIZE_T_MAX / elsize); 1231 nbytes = nelem * elsize; 1232 1233#ifdef WITH_VALGRIND 1234 if (UNLIKELY(running_on_valgrind == -1)) 1235 running_on_valgrind = RUNNING_ON_VALGRIND; 1236 if (UNLIKELY(running_on_valgrind)) 1237 goto redirect; 1238#endif 1239 1240 if (nelem == 0 || elsize == 0) 1241 goto redirect; 1242 1243 if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) { 1244 LOCK(); 1245 /* 1246 * Most frequent paths first 1247 */ 1248 size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT; 1249 pool = usedpools[size + size]; 1250 if (pool != pool->nextpool) { 1251 /* 1252 * There is a used pool for this size class. 1253 * Pick up the head block of its free list. 1254 */ 1255 ++pool->ref.count; 1256 bp = pool->freeblock; 1257 assert(bp != NULL); 1258 if ((pool->freeblock = *(block **)bp) != NULL) { 1259 UNLOCK(); 1260 if (use_calloc) 1261 memset(bp, 0, nbytes); 1262 return (void *)bp; 1263 } 1264 /* 1265 * Reached the end of the free list, try to extend it. 1266 */ 1267 if (pool->nextoffset <= pool->maxnextoffset) { 1268 /* There is room for another block. */ 1269 pool->freeblock = (block*)pool + 1270 pool->nextoffset; 1271 pool->nextoffset += INDEX2SIZE(size); 1272 *(block **)(pool->freeblock) = NULL; 1273 UNLOCK(); 1274 if (use_calloc) 1275 memset(bp, 0, nbytes); 1276 return (void *)bp; 1277 } 1278 /* Pool is full, unlink from used pools. */ 1279 next = pool->nextpool; 1280 pool = pool->prevpool; 1281 next->prevpool = pool; 1282 pool->nextpool = next; 1283 UNLOCK(); 1284 if (use_calloc) 1285 memset(bp, 0, nbytes); 1286 return (void *)bp; 1287 } 1288 1289 /* There isn't a pool of the right size class immediately 1290 * available: use a free pool. 1291 */ 1292 if (usable_arenas == NULL) { 1293 /* No arena has a free pool: allocate a new arena. */ 1294#ifdef WITH_MEMORY_LIMITS 1295 if (narenas_currently_allocated >= MAX_ARENAS) { 1296 UNLOCK(); 1297 goto redirect; 1298 } 1299#endif 1300 usable_arenas = new_arena(); 1301 if (usable_arenas == NULL) { 1302 UNLOCK(); 1303 goto redirect; 1304 } 1305 usable_arenas->nextarena = 1306 usable_arenas->prevarena = NULL; 1307 } 1308 assert(usable_arenas->address != 0); 1309 1310 /* Try to get a cached free pool. */ 1311 pool = usable_arenas->freepools; 1312 if (pool != NULL) { 1313 /* Unlink from cached pools. */ 1314 usable_arenas->freepools = pool->nextpool; 1315 1316 /* This arena already had the smallest nfreepools 1317 * value, so decreasing nfreepools doesn't change 1318 * that, and we don't need to rearrange the 1319 * usable_arenas list. However, if the arena has 1320 * become wholly allocated, we need to remove its 1321 * arena_object from usable_arenas. 1322 */ 1323 --usable_arenas->nfreepools; 1324 if (usable_arenas->nfreepools == 0) { 1325 /* Wholly allocated: remove. */ 1326 assert(usable_arenas->freepools == NULL); 1327 assert(usable_arenas->nextarena == NULL || 1328 usable_arenas->nextarena->prevarena == 1329 usable_arenas); 1330 1331 usable_arenas = usable_arenas->nextarena; 1332 if (usable_arenas != NULL) { 1333 usable_arenas->prevarena = NULL; 1334 assert(usable_arenas->address != 0); 1335 } 1336 } 1337 else { 1338 /* nfreepools > 0: it must be that freepools 1339 * isn't NULL, or that we haven't yet carved 1340 * off all the arena's pools for the first 1341 * time. 1342 */ 1343 assert(usable_arenas->freepools != NULL || 1344 usable_arenas->pool_address <= 1345 (block*)usable_arenas->address + 1346 ARENA_SIZE - POOL_SIZE); 1347 } 1348 init_pool: 1349 /* Frontlink to used pools. */ 1350 next = usedpools[size + size]; /* == prev */ 1351 pool->nextpool = next; 1352 pool->prevpool = next; 1353 next->nextpool = pool; 1354 next->prevpool = pool; 1355 pool->ref.count = 1; 1356 if (pool->szidx == size) { 1357 /* Luckily, this pool last contained blocks 1358 * of the same size class, so its header 1359 * and free list are already initialized. 1360 */ 1361 bp = pool->freeblock; 1362 assert(bp != NULL); 1363 pool->freeblock = *(block **)bp; 1364 UNLOCK(); 1365 if (use_calloc) 1366 memset(bp, 0, nbytes); 1367 return (void *)bp; 1368 } 1369 /* 1370 * Initialize the pool header, set up the free list to 1371 * contain just the second block, and return the first 1372 * block. 1373 */ 1374 pool->szidx = size; 1375 size = INDEX2SIZE(size); 1376 bp = (block *)pool + POOL_OVERHEAD; 1377 pool->nextoffset = POOL_OVERHEAD + (size << 1); 1378 pool->maxnextoffset = POOL_SIZE - size; 1379 pool->freeblock = bp + size; 1380 *(block **)(pool->freeblock) = NULL; 1381 UNLOCK(); 1382 if (use_calloc) 1383 memset(bp, 0, nbytes); 1384 return (void *)bp; 1385 } 1386 1387 /* Carve off a new pool. */ 1388 assert(usable_arenas->nfreepools > 0); 1389 assert(usable_arenas->freepools == NULL); 1390 pool = (poolp)usable_arenas->pool_address; 1391 assert((block*)pool <= (block*)usable_arenas->address + 1392 ARENA_SIZE - POOL_SIZE); 1393 pool->arenaindex = (uint)(usable_arenas - arenas); 1394 assert(&arenas[pool->arenaindex] == usable_arenas); 1395 pool->szidx = DUMMY_SIZE_IDX; 1396 usable_arenas->pool_address += POOL_SIZE; 1397 --usable_arenas->nfreepools; 1398 1399 if (usable_arenas->nfreepools == 0) { 1400 assert(usable_arenas->nextarena == NULL || 1401 usable_arenas->nextarena->prevarena == 1402 usable_arenas); 1403 /* Unlink the arena: it is completely allocated. */ 1404 usable_arenas = usable_arenas->nextarena; 1405 if (usable_arenas != NULL) { 1406 usable_arenas->prevarena = NULL; 1407 assert(usable_arenas->address != 0); 1408 } 1409 } 1410 1411 goto init_pool; 1412 } 1413 1414 /* The small block allocator ends here. */ 1415 1416redirect: 1417 /* Redirect the original request to the underlying (libc) allocator. 1418 * We jump here on bigger requests, on error in the code above (as a 1419 * last chance to serve the request) or when the max memory limit 1420 * has been reached. 1421 */ 1422 { 1423 void *result; 1424 if (use_calloc) 1425 result = PyMem_RawCalloc(nelem, elsize); 1426 else 1427 result = PyMem_RawMalloc(nbytes); 1428 if (!result) 1429 _Py_AllocatedBlocks--; 1430 return result; 1431 } 1432} 1433 1434static void * 1435_PyObject_Malloc(void *ctx, size_t nbytes) 1436{ 1437 return _PyObject_Alloc(0, ctx, 1, nbytes); 1438} 1439 1440static void * 1441_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize) 1442{ 1443 return _PyObject_Alloc(1, ctx, nelem, elsize); 1444} 1445 1446/* free */ 1447 1448static void 1449_PyObject_Free(void *ctx, void *p) 1450{ 1451 poolp pool; 1452 block *lastfree; 1453 poolp next, prev; 1454 uint size; 1455 1456 if (p == NULL) /* free(NULL) has no effect */ 1457 return; 1458 1459 _Py_AllocatedBlocks--; 1460 1461#ifdef WITH_VALGRIND 1462 if (UNLIKELY(running_on_valgrind > 0)) 1463 goto redirect; 1464#endif 1465 1466 pool = POOL_ADDR(p); 1467 if (address_in_range(p, pool)) { 1468 /* We allocated this address. */ 1469 LOCK(); 1470 /* Link p to the start of the pool's freeblock list. Since 1471 * the pool had at least the p block outstanding, the pool 1472 * wasn't empty (so it's already in a usedpools[] list, or 1473 * was full and is in no list -- it's not in the freeblocks 1474 * list in any case). 1475 */ 1476 assert(pool->ref.count > 0); /* else it was empty */ 1477 *(block **)p = lastfree = pool->freeblock; 1478 pool->freeblock = (block *)p; 1479 if (lastfree) { 1480 struct arena_object* ao; 1481 uint nf; /* ao->nfreepools */ 1482 1483 /* freeblock wasn't NULL, so the pool wasn't full, 1484 * and the pool is in a usedpools[] list. 1485 */ 1486 if (--pool->ref.count != 0) { 1487 /* pool isn't empty: leave it in usedpools */ 1488 UNLOCK(); 1489 return; 1490 } 1491 /* Pool is now empty: unlink from usedpools, and 1492 * link to the front of freepools. This ensures that 1493 * previously freed pools will be allocated later 1494 * (being not referenced, they are perhaps paged out). 1495 */ 1496 next = pool->nextpool; 1497 prev = pool->prevpool; 1498 next->prevpool = prev; 1499 prev->nextpool = next; 1500 1501 /* Link the pool to freepools. This is a singly-linked 1502 * list, and pool->prevpool isn't used there. 1503 */ 1504 ao = &arenas[pool->arenaindex]; 1505 pool->nextpool = ao->freepools; 1506 ao->freepools = pool; 1507 nf = ++ao->nfreepools; 1508 1509 /* All the rest is arena management. We just freed 1510 * a pool, and there are 4 cases for arena mgmt: 1511 * 1. If all the pools are free, return the arena to 1512 * the system free(). 1513 * 2. If this is the only free pool in the arena, 1514 * add the arena back to the `usable_arenas` list. 1515 * 3. If the "next" arena has a smaller count of free 1516 * pools, we have to "slide this arena right" to 1517 * restore that usable_arenas is sorted in order of 1518 * nfreepools. 1519 * 4. Else there's nothing more to do. 1520 */ 1521 if (nf == ao->ntotalpools) { 1522 /* Case 1. First unlink ao from usable_arenas. 1523 */ 1524 assert(ao->prevarena == NULL || 1525 ao->prevarena->address != 0); 1526 assert(ao ->nextarena == NULL || 1527 ao->nextarena->address != 0); 1528 1529 /* Fix the pointer in the prevarena, or the 1530 * usable_arenas pointer. 1531 */ 1532 if (ao->prevarena == NULL) { 1533 usable_arenas = ao->nextarena; 1534 assert(usable_arenas == NULL || 1535 usable_arenas->address != 0); 1536 } 1537 else { 1538 assert(ao->prevarena->nextarena == ao); 1539 ao->prevarena->nextarena = 1540 ao->nextarena; 1541 } 1542 /* Fix the pointer in the nextarena. */ 1543 if (ao->nextarena != NULL) { 1544 assert(ao->nextarena->prevarena == ao); 1545 ao->nextarena->prevarena = 1546 ao->prevarena; 1547 } 1548 /* Record that this arena_object slot is 1549 * available to be reused. 1550 */ 1551 ao->nextarena = unused_arena_objects; 1552 unused_arena_objects = ao; 1553 1554 /* Free the entire arena. */ 1555 _PyObject_Arena.free(_PyObject_Arena.ctx, 1556 (void *)ao->address, ARENA_SIZE); 1557 ao->address = 0; /* mark unassociated */ 1558 --narenas_currently_allocated; 1559 1560 UNLOCK(); 1561 return; 1562 } 1563 if (nf == 1) { 1564 /* Case 2. Put ao at the head of 1565 * usable_arenas. Note that because 1566 * ao->nfreepools was 0 before, ao isn't 1567 * currently on the usable_arenas list. 1568 */ 1569 ao->nextarena = usable_arenas; 1570 ao->prevarena = NULL; 1571 if (usable_arenas) 1572 usable_arenas->prevarena = ao; 1573 usable_arenas = ao; 1574 assert(usable_arenas->address != 0); 1575 1576 UNLOCK(); 1577 return; 1578 } 1579 /* If this arena is now out of order, we need to keep 1580 * the list sorted. The list is kept sorted so that 1581 * the "most full" arenas are used first, which allows 1582 * the nearly empty arenas to be completely freed. In 1583 * a few un-scientific tests, it seems like this 1584 * approach allowed a lot more memory to be freed. 1585 */ 1586 if (ao->nextarena == NULL || 1587 nf <= ao->nextarena->nfreepools) { 1588 /* Case 4. Nothing to do. */ 1589 UNLOCK(); 1590 return; 1591 } 1592 /* Case 3: We have to move the arena towards the end 1593 * of the list, because it has more free pools than 1594 * the arena to its right. 1595 * First unlink ao from usable_arenas. 1596 */ 1597 if (ao->prevarena != NULL) { 1598 /* ao isn't at the head of the list */ 1599 assert(ao->prevarena->nextarena == ao); 1600 ao->prevarena->nextarena = ao->nextarena; 1601 } 1602 else { 1603 /* ao is at the head of the list */ 1604 assert(usable_arenas == ao); 1605 usable_arenas = ao->nextarena; 1606 } 1607 ao->nextarena->prevarena = ao->prevarena; 1608 1609 /* Locate the new insertion point by iterating over 1610 * the list, using our nextarena pointer. 1611 */ 1612 while (ao->nextarena != NULL && 1613 nf > ao->nextarena->nfreepools) { 1614 ao->prevarena = ao->nextarena; 1615 ao->nextarena = ao->nextarena->nextarena; 1616 } 1617 1618 /* Insert ao at this point. */ 1619 assert(ao->nextarena == NULL || 1620 ao->prevarena == ao->nextarena->prevarena); 1621 assert(ao->prevarena->nextarena == ao->nextarena); 1622 1623 ao->prevarena->nextarena = ao; 1624 if (ao->nextarena != NULL) 1625 ao->nextarena->prevarena = ao; 1626 1627 /* Verify that the swaps worked. */ 1628 assert(ao->nextarena == NULL || 1629 nf <= ao->nextarena->nfreepools); 1630 assert(ao->prevarena == NULL || 1631 nf > ao->prevarena->nfreepools); 1632 assert(ao->nextarena == NULL || 1633 ao->nextarena->prevarena == ao); 1634 assert((usable_arenas == ao && 1635 ao->prevarena == NULL) || 1636 ao->prevarena->nextarena == ao); 1637 1638 UNLOCK(); 1639 return; 1640 } 1641 /* Pool was full, so doesn't currently live in any list: 1642 * link it to the front of the appropriate usedpools[] list. 1643 * This mimics LRU pool usage for new allocations and 1644 * targets optimal filling when several pools contain 1645 * blocks of the same size class. 1646 */ 1647 --pool->ref.count; 1648 assert(pool->ref.count > 0); /* else the pool is empty */ 1649 size = pool->szidx; 1650 next = usedpools[size + size]; 1651 prev = next->prevpool; 1652 /* insert pool before next: prev <-> pool <-> next */ 1653 pool->nextpool = next; 1654 pool->prevpool = prev; 1655 next->prevpool = pool; 1656 prev->nextpool = pool; 1657 UNLOCK(); 1658 return; 1659 } 1660 1661#ifdef WITH_VALGRIND 1662redirect: 1663#endif 1664 /* We didn't allocate this address. */ 1665 PyMem_RawFree(p); 1666} 1667 1668/* realloc. If p is NULL, this acts like malloc(nbytes). Else if nbytes==0, 1669 * then as the Python docs promise, we do not treat this like free(p), and 1670 * return a non-NULL result. 1671 */ 1672 1673static void * 1674_PyObject_Realloc(void *ctx, void *p, size_t nbytes) 1675{ 1676 void *bp; 1677 poolp pool; 1678 size_t size; 1679 1680 if (p == NULL) 1681 return _PyObject_Alloc(0, ctx, 1, nbytes); 1682 1683#ifdef WITH_VALGRIND 1684 /* Treat running_on_valgrind == -1 the same as 0 */ 1685 if (UNLIKELY(running_on_valgrind > 0)) 1686 goto redirect; 1687#endif 1688 1689 pool = POOL_ADDR(p); 1690 if (address_in_range(p, pool)) { 1691 /* We're in charge of this block */ 1692 size = INDEX2SIZE(pool->szidx); 1693 if (nbytes <= size) { 1694 /* The block is staying the same or shrinking. If 1695 * it's shrinking, there's a tradeoff: it costs 1696 * cycles to copy the block to a smaller size class, 1697 * but it wastes memory not to copy it. The 1698 * compromise here is to copy on shrink only if at 1699 * least 25% of size can be shaved off. 1700 */ 1701 if (4 * nbytes > 3 * size) { 1702 /* It's the same, 1703 * or shrinking and new/old > 3/4. 1704 */ 1705 return p; 1706 } 1707 size = nbytes; 1708 } 1709 bp = _PyObject_Alloc(0, ctx, 1, nbytes); 1710 if (bp != NULL) { 1711 memcpy(bp, p, size); 1712 _PyObject_Free(ctx, p); 1713 } 1714 return bp; 1715 } 1716#ifdef WITH_VALGRIND 1717 redirect: 1718#endif 1719 /* We're not managing this block. If nbytes <= 1720 * SMALL_REQUEST_THRESHOLD, it's tempting to try to take over this 1721 * block. However, if we do, we need to copy the valid data from 1722 * the C-managed block to one of our blocks, and there's no portable 1723 * way to know how much of the memory space starting at p is valid. 1724 * As bug 1185883 pointed out the hard way, it's possible that the 1725 * C-managed block is "at the end" of allocated VM space, so that 1726 * a memory fault can occur if we try to copy nbytes bytes starting 1727 * at p. Instead we punt: let C continue to manage this block. 1728 */ 1729 if (nbytes) 1730 return PyMem_RawRealloc(p, nbytes); 1731 /* C doesn't define the result of realloc(p, 0) (it may or may not 1732 * return NULL then), but Python's docs promise that nbytes==0 never 1733 * returns NULL. We don't pass 0 to realloc(), to avoid that endcase 1734 * to begin with. Even then, we can't be sure that realloc() won't 1735 * return NULL. 1736 */ 1737 bp = PyMem_RawRealloc(p, 1); 1738 return bp ? bp : p; 1739} 1740 1741#else /* ! WITH_PYMALLOC */ 1742 1743/*==========================================================================*/ 1744/* pymalloc not enabled: Redirect the entry points to malloc. These will 1745 * only be used by extensions that are compiled with pymalloc enabled. */ 1746 1747Py_ssize_t 1748_Py_GetAllocatedBlocks(void) 1749{ 1750 return 0; 1751} 1752 1753#endif /* WITH_PYMALLOC */ 1754 1755 1756/*==========================================================================*/ 1757/* A x-platform debugging allocator. This doesn't manage memory directly, 1758 * it wraps a real allocator, adding extra debugging info to the memory blocks. 1759 */ 1760 1761/* Special bytes broadcast into debug memory blocks at appropriate times. 1762 * Strings of these are unlikely to be valid addresses, floats, ints or 1763 * 7-bit ASCII. 1764 */ 1765#undef CLEANBYTE 1766#undef DEADBYTE 1767#undef FORBIDDENBYTE 1768#define CLEANBYTE 0xCB /* clean (newly allocated) memory */ 1769#define DEADBYTE 0xDB /* dead (newly freed) memory */ 1770#define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */ 1771 1772static size_t serialno = 0; /* incremented on each debug {m,re}alloc */ 1773 1774/* serialno is always incremented via calling this routine. The point is 1775 * to supply a single place to set a breakpoint. 1776 */ 1777static void 1778bumpserialno(void) 1779{ 1780 ++serialno; 1781} 1782 1783#define SST SIZEOF_SIZE_T 1784 1785/* Read sizeof(size_t) bytes at p as a big-endian size_t. */ 1786static size_t 1787read_size_t(const void *p) 1788{ 1789 const uint8_t *q = (const uint8_t *)p; 1790 size_t result = *q++; 1791 int i; 1792 1793 for (i = SST; --i > 0; ++q) 1794 result = (result << 8) | *q; 1795 return result; 1796} 1797 1798/* Write n as a big-endian size_t, MSB at address p, LSB at 1799 * p + sizeof(size_t) - 1. 1800 */ 1801static void 1802write_size_t(void *p, size_t n) 1803{ 1804 uint8_t *q = (uint8_t *)p + SST - 1; 1805 int i; 1806 1807 for (i = SST; --i >= 0; --q) { 1808 *q = (uint8_t)(n & 0xff); 1809 n >>= 8; 1810 } 1811} 1812 1813/* Let S = sizeof(size_t). The debug malloc asks for 4*S extra bytes and 1814 fills them with useful stuff, here calling the underlying malloc's result p: 1815 1816p[0: S] 1817 Number of bytes originally asked for. This is a size_t, big-endian (easier 1818 to read in a memory dump). 1819p[S] 1820 API ID. See PEP 445. This is a character, but seems undocumented. 1821p[S+1: 2*S] 1822 Copies of FORBIDDENBYTE. Used to catch under- writes and reads. 1823p[2*S: 2*S+n] 1824 The requested memory, filled with copies of CLEANBYTE. 1825 Used to catch reference to uninitialized memory. 1826 &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc 1827 handled the request itself. 1828p[2*S+n: 2*S+n+S] 1829 Copies of FORBIDDENBYTE. Used to catch over- writes and reads. 1830p[2*S+n+S: 2*S+n+2*S] 1831 A serial number, incremented by 1 on each call to _PyMem_DebugMalloc 1832 and _PyMem_DebugRealloc. 1833 This is a big-endian size_t. 1834 If "bad memory" is detected later, the serial number gives an 1835 excellent way to set a breakpoint on the next run, to capture the 1836 instant at which this block was passed out. 1837*/ 1838 1839static void * 1840_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes) 1841{ 1842 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; 1843 uint8_t *p; /* base address of malloc'ed block */ 1844 uint8_t *tail; /* p + 2*SST + nbytes == pointer to tail pad bytes */ 1845 size_t total; /* nbytes + 4*SST */ 1846 1847 bumpserialno(); 1848 total = nbytes + 4*SST; 1849 if (nbytes > PY_SSIZE_T_MAX - 4*SST) 1850 /* overflow: can't represent total as a Py_ssize_t */ 1851 return NULL; 1852 1853 if (use_calloc) 1854 p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total); 1855 else 1856 p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total); 1857 if (p == NULL) 1858 return NULL; 1859 1860 /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */ 1861 write_size_t(p, nbytes); 1862 p[SST] = (uint8_t)api->api_id; 1863 memset(p + SST + 1, FORBIDDENBYTE, SST-1); 1864 1865 if (nbytes > 0 && !use_calloc) 1866 memset(p + 2*SST, CLEANBYTE, nbytes); 1867 1868 /* at tail, write pad (SST bytes) and serialno (SST bytes) */ 1869 tail = p + 2*SST + nbytes; 1870 memset(tail, FORBIDDENBYTE, SST); 1871 write_size_t(tail + SST, serialno); 1872 1873 return p + 2*SST; 1874} 1875 1876static void * 1877_PyMem_DebugRawMalloc(void *ctx, size_t nbytes) 1878{ 1879 return _PyMem_DebugRawAlloc(0, ctx, nbytes); 1880} 1881 1882static void * 1883_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize) 1884{ 1885 size_t nbytes; 1886 assert(elsize == 0 || nelem <= PY_SSIZE_T_MAX / elsize); 1887 nbytes = nelem * elsize; 1888 return _PyMem_DebugRawAlloc(1, ctx, nbytes); 1889} 1890 1891/* The debug free first checks the 2*SST bytes on each end for sanity (in 1892 particular, that the FORBIDDENBYTEs with the api ID are still intact). 1893 Then fills the original bytes with DEADBYTE. 1894 Then calls the underlying free. 1895*/ 1896static void 1897_PyMem_DebugRawFree(void *ctx, void *p) 1898{ 1899 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; 1900 uint8_t *q = (uint8_t *)p - 2*SST; /* address returned from malloc */ 1901 size_t nbytes; 1902 1903 if (p == NULL) 1904 return; 1905 _PyMem_DebugCheckAddress(api->api_id, p); 1906 nbytes = read_size_t(q); 1907 nbytes += 4*SST; 1908 if (nbytes > 0) 1909 memset(q, DEADBYTE, nbytes); 1910 api->alloc.free(api->alloc.ctx, q); 1911} 1912 1913static void * 1914_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes) 1915{ 1916 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx; 1917 uint8_t *q = (uint8_t *)p, *oldq; 1918 uint8_t *tail; 1919 size_t total; /* nbytes + 4*SST */ 1920 size_t original_nbytes; 1921 int i; 1922 1923 if (p == NULL) 1924 return _PyMem_DebugRawAlloc(0, ctx, nbytes); 1925 1926 _PyMem_DebugCheckAddress(api->api_id, p); 1927 bumpserialno(); 1928 original_nbytes = read_size_t(q - 2*SST); 1929 total = nbytes + 4*SST; 1930 if (nbytes > PY_SSIZE_T_MAX - 4*SST) 1931 /* overflow: can't represent total as a Py_ssize_t */ 1932 return NULL; 1933 1934 /* Resize and add decorations. We may get a new pointer here, in which 1935 * case we didn't get the chance to mark the old memory with DEADBYTE, 1936 * but we live with that. 1937 */ 1938 oldq = q; 1939 q = (uint8_t *)api->alloc.realloc(api->alloc.ctx, q - 2*SST, total); 1940 if (q == NULL) 1941 return NULL; 1942 1943 if (q == oldq && nbytes < original_nbytes) { 1944 /* shrinking: mark old extra memory dead */ 1945 memset(q + nbytes, DEADBYTE, original_nbytes - nbytes); 1946 } 1947 1948 write_size_t(q, nbytes); 1949 assert(q[SST] == (uint8_t)api->api_id); 1950 for (i = 1; i < SST; ++i) 1951 assert(q[SST + i] == FORBIDDENBYTE); 1952 q += 2*SST; 1953 1954 tail = q + nbytes; 1955 memset(tail, FORBIDDENBYTE, SST); 1956 write_size_t(tail + SST, serialno); 1957 1958 if (nbytes > original_nbytes) { 1959 /* growing: mark new extra memory clean */ 1960 memset(q + original_nbytes, CLEANBYTE, 1961 nbytes - original_nbytes); 1962 } 1963 1964 return q; 1965} 1966 1967static void 1968_PyMem_DebugCheckGIL(void) 1969{ 1970#ifdef WITH_THREAD 1971 if (!PyGILState_Check()) 1972 Py_FatalError("Python memory allocator called " 1973 "without holding the GIL"); 1974#endif 1975} 1976 1977static void * 1978_PyMem_DebugMalloc(void *ctx, size_t nbytes) 1979{ 1980 _PyMem_DebugCheckGIL(); 1981 return _PyMem_DebugRawMalloc(ctx, nbytes); 1982} 1983 1984static void * 1985_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize) 1986{ 1987 _PyMem_DebugCheckGIL(); 1988 return _PyMem_DebugRawCalloc(ctx, nelem, elsize); 1989} 1990 1991static void 1992_PyMem_DebugFree(void *ctx, void *ptr) 1993{ 1994 _PyMem_DebugCheckGIL(); 1995 _PyMem_DebugRawFree(ctx, ptr); 1996} 1997 1998static void * 1999_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes) 2000{ 2001 _PyMem_DebugCheckGIL(); 2002 return _PyMem_DebugRawRealloc(ctx, ptr, nbytes); 2003} 2004 2005/* Check the forbidden bytes on both ends of the memory allocated for p. 2006 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress, 2007 * and call Py_FatalError to kill the program. 2008 * The API id, is also checked. 2009 */ 2010static void 2011_PyMem_DebugCheckAddress(char api, const void *p) 2012{ 2013 const uint8_t *q = (const uint8_t *)p; 2014 char msgbuf[64]; 2015 char *msg; 2016 size_t nbytes; 2017 const uint8_t *tail; 2018 int i; 2019 char id; 2020 2021 if (p == NULL) { 2022 msg = "didn't expect a NULL pointer"; 2023 goto error; 2024 } 2025 2026 /* Check the API id */ 2027 id = (char)q[-SST]; 2028 if (id != api) { 2029 msg = msgbuf; 2030 snprintf(msg, sizeof(msgbuf), "bad ID: Allocated using API '%c', verified using API '%c'", id, api); 2031 msgbuf[sizeof(msgbuf)-1] = 0; 2032 goto error; 2033 } 2034 2035 /* Check the stuff at the start of p first: if there's underwrite 2036 * corruption, the number-of-bytes field may be nuts, and checking 2037 * the tail could lead to a segfault then. 2038 */ 2039 for (i = SST-1; i >= 1; --i) { 2040 if (*(q-i) != FORBIDDENBYTE) { 2041 msg = "bad leading pad byte"; 2042 goto error; 2043 } 2044 } 2045 2046 nbytes = read_size_t(q - 2*SST); 2047 tail = q + nbytes; 2048 for (i = 0; i < SST; ++i) { 2049 if (tail[i] != FORBIDDENBYTE) { 2050 msg = "bad trailing pad byte"; 2051 goto error; 2052 } 2053 } 2054 2055 return; 2056 2057error: 2058 _PyObject_DebugDumpAddress(p); 2059 Py_FatalError(msg); 2060} 2061 2062/* Display info to stderr about the memory block at p. */ 2063static void 2064_PyObject_DebugDumpAddress(const void *p) 2065{ 2066 const uint8_t *q = (const uint8_t *)p; 2067 const uint8_t *tail; 2068 size_t nbytes, serial; 2069 int i; 2070 int ok; 2071 char id; 2072 2073 fprintf(stderr, "Debug memory block at address p=%p:", p); 2074 if (p == NULL) { 2075 fprintf(stderr, "\n"); 2076 return; 2077 } 2078 id = (char)q[-SST]; 2079 fprintf(stderr, " API '%c'\n", id); 2080 2081 nbytes = read_size_t(q - 2*SST); 2082 fprintf(stderr, " %" PY_FORMAT_SIZE_T "u bytes originally " 2083 "requested\n", nbytes); 2084 2085 /* In case this is nuts, check the leading pad bytes first. */ 2086 fprintf(stderr, " The %d pad bytes at p-%d are ", SST-1, SST-1); 2087 ok = 1; 2088 for (i = 1; i <= SST-1; ++i) { 2089 if (*(q-i) != FORBIDDENBYTE) { 2090 ok = 0; 2091 break; 2092 } 2093 } 2094 if (ok) 2095 fputs("FORBIDDENBYTE, as expected.\n", stderr); 2096 else { 2097 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n", 2098 FORBIDDENBYTE); 2099 for (i = SST-1; i >= 1; --i) { 2100 const uint8_t byte = *(q-i); 2101 fprintf(stderr, " at p-%d: 0x%02x", i, byte); 2102 if (byte != FORBIDDENBYTE) 2103 fputs(" *** OUCH", stderr); 2104 fputc('\n', stderr); 2105 } 2106 2107 fputs(" Because memory is corrupted at the start, the " 2108 "count of bytes requested\n" 2109 " may be bogus, and checking the trailing pad " 2110 "bytes may segfault.\n", stderr); 2111 } 2112 2113 tail = q + nbytes; 2114 fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, tail); 2115 ok = 1; 2116 for (i = 0; i < SST; ++i) { 2117 if (tail[i] != FORBIDDENBYTE) { 2118 ok = 0; 2119 break; 2120 } 2121 } 2122 if (ok) 2123 fputs("FORBIDDENBYTE, as expected.\n", stderr); 2124 else { 2125 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n", 2126 FORBIDDENBYTE); 2127 for (i = 0; i < SST; ++i) { 2128 const uint8_t byte = tail[i]; 2129 fprintf(stderr, " at tail+%d: 0x%02x", 2130 i, byte); 2131 if (byte != FORBIDDENBYTE) 2132 fputs(" *** OUCH", stderr); 2133 fputc('\n', stderr); 2134 } 2135 } 2136 2137 serial = read_size_t(tail + SST); 2138 fprintf(stderr, " The block was made by call #%" PY_FORMAT_SIZE_T 2139 "u to debug malloc/realloc.\n", serial); 2140 2141 if (nbytes > 0) { 2142 i = 0; 2143 fputs(" Data at p:", stderr); 2144 /* print up to 8 bytes at the start */ 2145 while (q < tail && i < 8) { 2146 fprintf(stderr, " %02x", *q); 2147 ++i; 2148 ++q; 2149 } 2150 /* and up to 8 at the end */ 2151 if (q < tail) { 2152 if (tail - q > 8) { 2153 fputs(" ...", stderr); 2154 q = tail - 8; 2155 } 2156 while (q < tail) { 2157 fprintf(stderr, " %02x", *q); 2158 ++q; 2159 } 2160 } 2161 fputc('\n', stderr); 2162 } 2163 fputc('\n', stderr); 2164 2165 fflush(stderr); 2166 _PyMem_DumpTraceback(fileno(stderr), p); 2167} 2168 2169 2170static size_t 2171printone(FILE *out, const char* msg, size_t value) 2172{ 2173 int i, k; 2174 char buf[100]; 2175 size_t origvalue = value; 2176 2177 fputs(msg, out); 2178 for (i = (int)strlen(msg); i < 35; ++i) 2179 fputc(' ', out); 2180 fputc('=', out); 2181 2182 /* Write the value with commas. */ 2183 i = 22; 2184 buf[i--] = '\0'; 2185 buf[i--] = '\n'; 2186 k = 3; 2187 do { 2188 size_t nextvalue = value / 10; 2189 unsigned int digit = (unsigned int)(value - nextvalue * 10); 2190 value = nextvalue; 2191 buf[i--] = (char)(digit + '0'); 2192 --k; 2193 if (k == 0 && value && i >= 0) { 2194 k = 3; 2195 buf[i--] = ','; 2196 } 2197 } while (value && i >= 0); 2198 2199 while (i >= 0) 2200 buf[i--] = ' '; 2201 fputs(buf, out); 2202 2203 return origvalue; 2204} 2205 2206void 2207_PyDebugAllocatorStats(FILE *out, 2208 const char *block_name, int num_blocks, size_t sizeof_block) 2209{ 2210 char buf1[128]; 2211 char buf2[128]; 2212 PyOS_snprintf(buf1, sizeof(buf1), 2213 "%d %ss * %" PY_FORMAT_SIZE_T "d bytes each", 2214 num_blocks, block_name, sizeof_block); 2215 PyOS_snprintf(buf2, sizeof(buf2), 2216 "%48s ", buf1); 2217 (void)printone(out, buf2, num_blocks * sizeof_block); 2218} 2219 2220 2221#ifdef WITH_PYMALLOC 2222 2223#ifdef Py_DEBUG 2224/* Is target in the list? The list is traversed via the nextpool pointers. 2225 * The list may be NULL-terminated, or circular. Return 1 if target is in 2226 * list, else 0. 2227 */ 2228static int 2229pool_is_in_list(const poolp target, poolp list) 2230{ 2231 poolp origlist = list; 2232 assert(target != NULL); 2233 if (list == NULL) 2234 return 0; 2235 do { 2236 if (target == list) 2237 return 1; 2238 list = list->nextpool; 2239 } while (list != NULL && list != origlist); 2240 return 0; 2241} 2242#endif 2243 2244/* Print summary info to "out" about the state of pymalloc's structures. 2245 * In Py_DEBUG mode, also perform some expensive internal consistency 2246 * checks. 2247 */ 2248void 2249_PyObject_DebugMallocStats(FILE *out) 2250{ 2251 uint i; 2252 const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT; 2253 /* # of pools, allocated blocks, and free blocks per class index */ 2254 size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; 2255 size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; 2256 size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; 2257 /* total # of allocated bytes in used and full pools */ 2258 size_t allocated_bytes = 0; 2259 /* total # of available bytes in used pools */ 2260 size_t available_bytes = 0; 2261 /* # of free pools + pools not yet carved out of current arena */ 2262 uint numfreepools = 0; 2263 /* # of bytes for arena alignment padding */ 2264 size_t arena_alignment = 0; 2265 /* # of bytes in used and full pools used for pool_headers */ 2266 size_t pool_header_bytes = 0; 2267 /* # of bytes in used and full pools wasted due to quantization, 2268 * i.e. the necessarily leftover space at the ends of used and 2269 * full pools. 2270 */ 2271 size_t quantization = 0; 2272 /* # of arenas actually allocated. */ 2273 size_t narenas = 0; 2274 /* running total -- should equal narenas * ARENA_SIZE */ 2275 size_t total; 2276 char buf[128]; 2277 2278 fprintf(out, "Small block threshold = %d, in %u size classes.\n", 2279 SMALL_REQUEST_THRESHOLD, numclasses); 2280 2281 for (i = 0; i < numclasses; ++i) 2282 numpools[i] = numblocks[i] = numfreeblocks[i] = 0; 2283 2284 /* Because full pools aren't linked to from anything, it's easiest 2285 * to march over all the arenas. If we're lucky, most of the memory 2286 * will be living in full pools -- would be a shame to miss them. 2287 */ 2288 for (i = 0; i < maxarenas; ++i) { 2289 uint j; 2290 uintptr_t base = arenas[i].address; 2291 2292 /* Skip arenas which are not allocated. */ 2293 if (arenas[i].address == (uintptr_t)NULL) 2294 continue; 2295 narenas += 1; 2296 2297 numfreepools += arenas[i].nfreepools; 2298 2299 /* round up to pool alignment */ 2300 if (base & (uintptr_t)POOL_SIZE_MASK) { 2301 arena_alignment += POOL_SIZE; 2302 base &= ~(uintptr_t)POOL_SIZE_MASK; 2303 base += POOL_SIZE; 2304 } 2305 2306 /* visit every pool in the arena */ 2307 assert(base <= (uintptr_t) arenas[i].pool_address); 2308 for (j = 0; base < (uintptr_t) arenas[i].pool_address; 2309 ++j, base += POOL_SIZE) { 2310 poolp p = (poolp)base; 2311 const uint sz = p->szidx; 2312 uint freeblocks; 2313 2314 if (p->ref.count == 0) { 2315 /* currently unused */ 2316#ifdef Py_DEBUG 2317 assert(pool_is_in_list(p, arenas[i].freepools)); 2318#endif 2319 continue; 2320 } 2321 ++numpools[sz]; 2322 numblocks[sz] += p->ref.count; 2323 freeblocks = NUMBLOCKS(sz) - p->ref.count; 2324 numfreeblocks[sz] += freeblocks; 2325#ifdef Py_DEBUG 2326 if (freeblocks > 0) 2327 assert(pool_is_in_list(p, usedpools[sz + sz])); 2328#endif 2329 } 2330 } 2331 assert(narenas == narenas_currently_allocated); 2332 2333 fputc('\n', out); 2334 fputs("class size num pools blocks in use avail blocks\n" 2335 "----- ---- --------- ------------- ------------\n", 2336 out); 2337 2338 for (i = 0; i < numclasses; ++i) { 2339 size_t p = numpools[i]; 2340 size_t b = numblocks[i]; 2341 size_t f = numfreeblocks[i]; 2342 uint size = INDEX2SIZE(i); 2343 if (p == 0) { 2344 assert(b == 0 && f == 0); 2345 continue; 2346 } 2347 fprintf(out, "%5u %6u " 2348 "%11" PY_FORMAT_SIZE_T "u " 2349 "%15" PY_FORMAT_SIZE_T "u " 2350 "%13" PY_FORMAT_SIZE_T "u\n", 2351 i, size, p, b, f); 2352 allocated_bytes += b * size; 2353 available_bytes += f * size; 2354 pool_header_bytes += p * POOL_OVERHEAD; 2355 quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size); 2356 } 2357 fputc('\n', out); 2358 if (_PyMem_DebugEnabled()) 2359 (void)printone(out, "# times object malloc called", serialno); 2360 (void)printone(out, "# arenas allocated total", ntimes_arena_allocated); 2361 (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas); 2362 (void)printone(out, "# arenas highwater mark", narenas_highwater); 2363 (void)printone(out, "# arenas allocated current", narenas); 2364 2365 PyOS_snprintf(buf, sizeof(buf), 2366 "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena", 2367 narenas, ARENA_SIZE); 2368 (void)printone(out, buf, narenas * ARENA_SIZE); 2369 2370 fputc('\n', out); 2371 2372 total = printone(out, "# bytes in allocated blocks", allocated_bytes); 2373 total += printone(out, "# bytes in available blocks", available_bytes); 2374 2375 PyOS_snprintf(buf, sizeof(buf), 2376 "%u unused pools * %d bytes", numfreepools, POOL_SIZE); 2377 total += printone(out, buf, (size_t)numfreepools * POOL_SIZE); 2378 2379 total += printone(out, "# bytes lost to pool headers", pool_header_bytes); 2380 total += printone(out, "# bytes lost to quantization", quantization); 2381 total += printone(out, "# bytes lost to arena alignment", arena_alignment); 2382 (void)printone(out, "Total", total); 2383} 2384 2385#endif /* #ifdef WITH_PYMALLOC */ 2386