gslice.c revision f2613bf9ed1dcf0bd2e8b131d20b50d146a6316d
1/* GLIB sliced memory - fast concurrent memory chunk allocator
2 * Copyright (C) 2005 Tim Janik
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
18 */
19/* MT safe */
20#define _XOPEN_SOURCE 600       /* posix_memalign() */
21#include <stdlib.h>             /* posix_memalign() */
22#include <string.h>
23#include <errno.h>
24#include "config.h"
25#include "gmem.h"               /* gslice.h */
26#include "gthreadinit.h"
27#include "galias.h"
28#include "glib.h"
29#ifdef HAVE_UNISTD_H
30#include <unistd.h>             /* sysconf() */
31#endif
32#ifdef G_OS_WIN32
33#include <windows.h>
34#include <process.h>
35#endif
36
37/* the GSlice allocator is split up into 4 layers, roughly modelled after the slab
38 * allocator and magazine extensions as outlined in:
39 * + [Bonwick94] Jeff Bonwick, The slab allocator: An object-caching kernel
40 *   memory allocator. USENIX 1994, http://citeseer.ist.psu.edu/bonwick94slab.html
41 * + [Bonwick01] Bonwick and Jonathan Adams, Magazines and vmem: Extending the
42 *   slab allocator to many cpu's and arbitrary resources.
43 *   USENIX 2001, http://citeseer.ist.psu.edu/bonwick01magazines.html
44 * the layers are:
45 * - the thread magazines. for each (aligned) chunk size, a magazine (a list)
46 *   of recently freed and soon to be allocated chunks is maintained per thread.
47 *   this way, most alloc/free requests can be quickly satisfied from per-thread
48 *   free lists which only require one g_private_get() call to retrive the
49 *   thread handle.
50 * - the magazine cache. allocating and freeing chunks to/from threads only
51 *   occours at magazine sizes from a global depot of magazines. the depot
52 *   maintaines a 15 second working set of allocated magazines, so full
53 *   magazines are not allocated and released too often.
54 *   the chunk size dependent magazine sizes automatically adapt (within limits,
55 *   see [3]) to lock contention to properly scale performance across a variety
56 *   of SMP systems.
57 * - the slab allocator. this allocator allocates slabs (blocks of memory) close
58 *   to the system page size or multiples thereof which have to be page aligned.
59 *   the blocks are divided into smaller chunks which are used to satisfy
60 *   allocations from the upper layers. the space provided by the reminder of
61 *   the chunk size division is used for cache colorization (random distribution
62 *   of chunk addresses) to improve processor cache utilization. multiple slabs
63 *   with the same chunk size are kept in a partially sorted ring to allow O(1)
64 *   freeing and allocation of chunks (as long as the allocation of an entirely
65 *   new slab can be avoided).
66 * - the page allocator. on most modern systems, posix_memalign(3) or
67 *   memalign(3) should be available, so this is used to allocate blocks with
68 *   system page size based alignments and sizes or multiples thereof.
69 *   if no memalign variant is provided, valloc() is used instead and
70 *   block sizes are limited to the system page size (no multiples thereof).
71 *   as a fallback, on system without even valloc(), a malloc(3)-based page
72 *   allocator with alloc-only behaviour is used.
73 *
74 * NOTES:
75 * [1] some systems memalign(3) implementations may rely on boundary tagging for
76 *     the handed out memory chunks. to avoid excessive page-wise fragmentation,
77 *     we reserve 2 * sizeof (void*) per block size for the systems memalign(3),
78 *     specified in NATIVE_MALLOC_PADDING.
79 * [2] using the slab allocator alone already provides for a fast and efficient
80 *     allocator, it doesn't properly scale beyond single-threaded uses though.
81 *     also, the slab allocator implements eager free(3)-ing, i.e. does not
82 *     provide any form of caching or working set maintenance. so if used alone,
83 *     it's vulnerable to trashing for sequences of balanced (alloc, free) pairs
84 *     at certain thresholds.
85 * [3] magazine sizes are bound by an implementation specific minimum size and
86 *     a chunk size specific maximum to limit magazine storage sizes to roughly
87 *     16KB.
88 * [4] allocating ca. 8 chunks per block/page keeps a good balance between
89 *     external and internal fragmentation (<= 12.5%). [Bonwick94]
90 */
91
92/* --- macros and constants --- */
93#define LARGEALIGNMENT          (256)
94#define P2ALIGNMENT             (2 * sizeof (gsize))                            /* fits 2 pointers (assumed to be 2 * GLIB_SIZEOF_SIZE_T below) */
95#define ALIGN(size, base)       ((base) * (gsize) (((size) + (base) - 1) / (base)))
96#define NATIVE_MALLOC_PADDING   P2ALIGNMENT                                     /* per-page padding left for native malloc(3) see [1] */
97#define SLAB_INFO_SIZE          P2ALIGN (sizeof (SlabInfo) + NATIVE_MALLOC_PADDING)
98#define MAX_MAGAZINE_SIZE       (256)                                           /* see [3] and allocator_get_magazine_threshold() for this */
99#define MIN_MAGAZINE_SIZE       (4)
100#define MAX_STAMP_COUNTER       (7)                                             /* distributes the load of gettimeofday() */
101#define MAX_SLAB_CHUNK_SIZE(al) (((al)->max_page_size - SLAB_INFO_SIZE) / 8)    /* we want at last 8 chunks per page, see [4] */
102#define MAX_SLAB_INDEX(al)      (SLAB_INDEX (al, MAX_SLAB_CHUNK_SIZE (al)) + 1)
103#define SLAB_INDEX(al, asize)   ((asize) / P2ALIGNMENT - 1)                     /* asize must be P2ALIGNMENT aligned */
104#define SLAB_CHUNK_SIZE(al, ix) (((ix) + 1) * P2ALIGNMENT)
105#define SLAB_BPAGE_SIZE(al,csz) (8 * (csz) + SLAB_INFO_SIZE)
106
107/* optimized version of ALIGN (size, P2ALIGNMENT) */
108#if     GLIB_SIZEOF_SIZE_T * 2 == 8  /* P2ALIGNMENT */
109#define P2ALIGN(size)   (((size) + 0x7) & ~(gsize) 0x7)
110#elif   GLIB_SIZEOF_SIZE_T * 2 == 16 /* P2ALIGNMENT */
111#define P2ALIGN(size)   (((size) + 0xf) & ~(gsize) 0xf)
112#else
113#define P2ALIGN(size)   ALIGN (size, P2ALIGNMENT)
114#endif
115
116/* special helpers to avoid gmessage.c dependency */
117static void mem_error (const char *format, ...) G_GNUC_PRINTF (1,2);
118#define mem_assert(cond)    do { if (G_LIKELY (cond)) ; else mem_error ("assertion failed: %s", #cond); } while (0)
119
120/* --- structures --- */
121typedef struct _ChunkLink      ChunkLink;
122typedef struct _SlabInfo       SlabInfo;
123typedef struct _CachedMagazine CachedMagazine;
124struct _ChunkLink {
125  ChunkLink *next;
126  ChunkLink *data;
127};
128struct _SlabInfo {
129  ChunkLink *chunks;
130  guint n_allocated;
131  SlabInfo *next, *prev;
132};
133typedef struct {
134  ChunkLink *chunks;
135  gsize      count;                     /* approximative chunks list length */
136} Magazine;
137typedef struct {
138  Magazine   *magazine1;                /* array of MAX_SLAB_INDEX (allocator) */
139  Magazine   *magazine2;                /* array of MAX_SLAB_INDEX (allocator) */
140} ThreadMemory;
141typedef struct {
142  gboolean always_malloc;
143  gboolean bypass_magazines;
144  gsize    working_set_msecs;
145  guint    color_increment;
146} SliceConfig;
147typedef struct {
148  /* const after initialization */
149  gsize         min_page_size, max_page_size;
150  SliceConfig   config;
151  gsize         max_slab_chunk_size_for_magazine_cache;
152  /* magazine cache */
153  GMutex       *magazine_mutex;
154  ChunkLink   **magazines;                /* array of MAX_SLAB_INDEX (allocator) */
155  guint        *contention_counters;      /* array of MAX_SLAB_INDEX (allocator) */
156  gint          mutex_counter;
157  guint         stamp_counter;
158  guint         last_stamp;
159  /* slab allocator */
160  GMutex       *slab_mutex;
161  SlabInfo    **slab_stack;                /* array of MAX_SLAB_INDEX (allocator) */
162  guint        color_accu;
163} Allocator;
164
165/* --- prototypes --- */
166static gpointer     slab_allocator_alloc_chunk       (gsize      chunk_size);
167static void         slab_allocator_free_chunk        (gsize      chunk_size,
168                                                      gpointer   mem);
169static void         private_thread_memory_cleanup    (gpointer   data);
170static gpointer     allocator_memalign               (gsize      alignment,
171                                                      gsize      memsize);
172static void         allocator_memfree                (gsize      memsize,
173                                                      gpointer   mem);
174static inline void  magazine_cache_update_stamp      (void);
175static inline gsize allocator_get_magazine_threshold (Allocator *allocator,
176                                                      guint      ix);
177
178/* --- variables --- */
179static GPrivate        *private_thread_memory = NULL;
180static gsize            sys_page_size = 0;
181static Allocator        allocator[1] = { { 0, }, };
182static SliceConfig      slice_config = {
183  FALSE,        /* always_malloc */
184  FALSE,        /* bypass_magazines */
185  15 * 1000,    /* working_set_msecs */
186  1,            /* color increment, alt: 0x7fffffff */
187};
188
189/* --- auxillary funcitons --- */
190void
191g_slice_set_config (GSliceConfig ckey,
192                    gint64       value)
193{
194  g_return_if_fail (sys_page_size == 0);
195  switch (ckey)
196    {
197    case G_SLICE_CONFIG_ALWAYS_MALLOC:
198      slice_config.always_malloc = value != 0;
199      break;
200    case G_SLICE_CONFIG_BYPASS_MAGAZINES:
201      slice_config.bypass_magazines = value != 0;
202      break;
203    case G_SLICE_CONFIG_WORKING_SET_MSECS:
204      slice_config.working_set_msecs = value;
205      break;
206    case G_SLICE_CONFIG_COLOR_INCREMENT:
207      slice_config.color_increment = value;
208    default: ;
209    }
210}
211
212gint64
213g_slice_get_config (GSliceConfig ckey)
214{
215  switch (ckey)
216    {
217    case G_SLICE_CONFIG_ALWAYS_MALLOC:
218      return slice_config.always_malloc;
219    case G_SLICE_CONFIG_BYPASS_MAGAZINES:
220      return slice_config.bypass_magazines;
221    case G_SLICE_CONFIG_WORKING_SET_MSECS:
222      return slice_config.working_set_msecs;
223    case G_SLICE_CONFIG_CHUNK_SIZES:
224      return MAX_SLAB_INDEX (allocator);
225    case G_SLICE_CONFIG_COLOR_INCREMENT:
226      return slice_config.color_increment;
227    default:
228      return 0;
229    }
230}
231
232gint64*
233g_slice_get_config_state (GSliceConfig ckey,
234                          gint64       address,
235                          guint       *n_values)
236{
237  guint i = 0;
238  g_return_val_if_fail (n_values != NULL, NULL);
239  *n_values = 0;
240  switch (ckey)
241    {
242      gint64 array[64];
243    case G_SLICE_CONFIG_CONTENTION_COUNTER:
244      array[i++] = SLAB_CHUNK_SIZE (allocator, address);
245      array[i++] = allocator->contention_counters[address];
246      array[i++] = allocator_get_magazine_threshold (allocator, address);
247      *n_values = i;
248      return g_memdup (array, sizeof (array[0]) * *n_values);
249    default:
250      return NULL;
251    }
252}
253
254static void
255g_slice_init_nomessage (void)
256{
257  /* we may not use g_error() or friends here */
258  mem_assert (sys_page_size == 0);
259  mem_assert (MIN_MAGAZINE_SIZE >= 4);
260
261#ifdef G_OS_WIN32
262  {
263    SYSTEM_INFO system_info;
264    GetSystemInfo (&system_info);
265    sys_page_size = system_info.dwPageSize;
266  }
267#else
268  sys_page_size = sysconf (_SC_PAGESIZE); /* = sysconf (_SC_PAGE_SIZE); = getpagesize(); */
269#endif
270  mem_assert (sys_page_size >= 2 * LARGEALIGNMENT);
271  mem_assert ((sys_page_size & (sys_page_size - 1)) == 0);
272  allocator->config = slice_config;
273  allocator->min_page_size = sys_page_size;
274#if HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN
275  /* allow allocation of pages up to 8KB (with 8KB alignment).
276   * this is useful because many medium to large sized structures
277   * fit less than 8 times (see [4]) into 4KB pages.
278   * we allow very small page sizes here, to reduce wastage in
279   * threads if only small allocations are required (this does
280   * bear the risk of incresing allocation times and fragmentation
281   * though).
282   */
283  allocator->min_page_size = MAX (allocator->min_page_size, 4096);
284  allocator->max_page_size = MAX (allocator->min_page_size, 8192);
285  allocator->min_page_size = MIN (allocator->min_page_size, 128);
286#else
287  /* we can only align to system page size */
288  allocator->max_page_size = sys_page_size;
289#endif
290  allocator->magazine_mutex = NULL;     /* _g_slice_thread_init_nomessage() */
291  allocator->magazines = g_new0 (ChunkLink*, MAX_SLAB_INDEX (allocator));
292  allocator->contention_counters = g_new0 (guint, MAX_SLAB_INDEX (allocator));
293  allocator->mutex_counter = 0;
294  allocator->stamp_counter = MAX_STAMP_COUNTER; /* force initial update */
295  allocator->last_stamp = 0;
296  allocator->slab_mutex = NULL;         /* _g_slice_thread_init_nomessage() */
297  allocator->slab_stack = g_new0 (SlabInfo*, MAX_SLAB_INDEX (allocator));
298  allocator->color_accu = 0;
299  magazine_cache_update_stamp();
300  /* values cached for performance reasons */
301  allocator->max_slab_chunk_size_for_magazine_cache = MAX_SLAB_CHUNK_SIZE (allocator);
302  if (allocator->config.always_malloc || allocator->config.bypass_magazines)
303    allocator->max_slab_chunk_size_for_magazine_cache = 0;      /* non-optimized cases */
304}
305
306static inline guint
307allocator_categorize (gsize aligned_chunk_size)
308{
309  /* speed up the likely path */
310  if (G_LIKELY (aligned_chunk_size && aligned_chunk_size <= allocator->max_slab_chunk_size_for_magazine_cache))
311    return 1;           /* use magazine cache */
312
313  /* the above will fail (max_slab_chunk_size_for_magazine_cache == 0) if the
314   * allocator is still uninitialized, or if we are not configured to use the
315   * magazine cache.
316   */
317  if (!sys_page_size)
318    g_slice_init_nomessage ();
319  if (!allocator->config.always_malloc &&
320      aligned_chunk_size &&
321      aligned_chunk_size <= MAX_SLAB_CHUNK_SIZE (allocator))
322    {
323      if (allocator->config.bypass_magazines)
324        return 2;       /* use slab allocator, see [2] */
325      return 1;         /* use magazine cache */
326    }
327  return 0;             /* use malloc() */
328}
329
330void
331_g_slice_thread_init_nomessage (void)
332{
333  /* we may not use g_error() or friends here */
334  if (!sys_page_size)
335    g_slice_init_nomessage();
336  private_thread_memory = g_private_new (private_thread_memory_cleanup);
337  allocator->magazine_mutex = g_mutex_new();
338  allocator->slab_mutex = g_mutex_new();
339}
340
341static inline void
342g_mutex_lock_a (GMutex *mutex,
343                guint  *contention_counter)
344{
345  gboolean contention = FALSE;
346  if (!g_mutex_trylock (mutex))
347    {
348      g_mutex_lock (mutex);
349      contention = TRUE;
350    }
351  if (contention)
352    {
353      allocator->mutex_counter++;
354      if (allocator->mutex_counter >= 1)        /* quickly adapt to contention */
355        {
356          allocator->mutex_counter = 0;
357          *contention_counter = MIN (*contention_counter + 1, MAX_MAGAZINE_SIZE);
358        }
359    }
360  else /* !contention */
361    {
362      allocator->mutex_counter--;
363      if (allocator->mutex_counter < -11)       /* moderately recover magazine sizes */
364        {
365          allocator->mutex_counter = 0;
366          *contention_counter = MAX (*contention_counter, 1) - 1;
367        }
368    }
369}
370
371static inline ThreadMemory*
372thread_memory_from_self (void)
373{
374  ThreadMemory *tmem = g_private_get (private_thread_memory);
375  if (G_UNLIKELY (!tmem))
376    {
377      const guint n_magazines = MAX_SLAB_INDEX (allocator);
378      tmem = g_malloc0 (sizeof (ThreadMemory) + sizeof (Magazine) * 2 * n_magazines);
379      tmem->magazine1 = (Magazine*) (tmem + 1);
380      tmem->magazine2 = &tmem->magazine1[n_magazines];
381      g_private_set (private_thread_memory, tmem);
382    }
383  return tmem;
384}
385
386static inline ChunkLink*
387magazine_chain_pop_head (ChunkLink **magazine_chunks)
388{
389  /* magazine chains are linked via ChunkLink->next.
390   * each ChunkLink->data of the toplevel chain may point to a subchain,
391   * linked via ChunkLink->next. ChunkLink->data of the subchains just
392   * contains uninitialized junk.
393   */
394  ChunkLink *chunk = (*magazine_chunks)->data;
395  if (G_UNLIKELY (chunk))
396    {
397      /* allocating from freed list */
398      (*magazine_chunks)->data = chunk->next;
399    }
400  else
401    {
402      chunk = *magazine_chunks;
403      *magazine_chunks = chunk->next;
404    }
405  return chunk;
406}
407
408#if 0 /* useful for debugging */
409static guint
410magazine_count (ChunkLink *head)
411{
412  guint count = 0;
413  if (!head)
414    return 0;
415  while (head)
416    {
417      ChunkLink *child = head->data;
418      count += 1;
419      for (child = head->data; child; child = child->next)
420        count += 1;
421      head = head->next;
422    }
423  return count;
424}
425#endif
426
427static inline gsize
428allocator_get_magazine_threshold (Allocator *allocator,
429                                  guint      ix)
430{
431  /* the magazine size calculated here has a lower bound of MIN_MAGAZINE_SIZE,
432   * which is required by the implementation. also, for moderately sized chunks
433   * (say >= 64 bytes), magazine sizes shouldn't be much smaller then the number
434   * of chunks available per page/2 to avoid excessive traffic in the magazine
435   * cache for small to medium sized structures.
436   * the upper bound of the magazine size is effectively provided by
437   * MAX_MAGAZINE_SIZE. for larger chunks, this number is scaled down so that
438   * the content of a single magazine doesn't exceed ca. 16KB.
439   */
440  gsize chunk_size = SLAB_CHUNK_SIZE (allocator, ix);
441  guint threshold = MAX (MIN_MAGAZINE_SIZE, allocator->max_page_size / MAX (5 * chunk_size, 5 * 32));
442  guint contention_counter = allocator->contention_counters[ix];
443  if (G_UNLIKELY (contention_counter))  /* single CPU bias */
444    {
445      /* adapt contention counter thresholds to chunk sizes */
446      contention_counter = contention_counter * 64 / chunk_size;
447      threshold = MAX (threshold, contention_counter);
448    }
449  return threshold;
450}
451
452/* --- magazine cache --- */
453static inline void
454magazine_cache_update_stamp (void)
455{
456  if (allocator->stamp_counter >= MAX_STAMP_COUNTER)
457    {
458      GTimeVal tv;
459      g_get_current_time (&tv);
460      allocator->last_stamp = tv.tv_sec * 1000 + tv.tv_usec / 1000; /* milli seconds */
461      allocator->stamp_counter = 0;
462    }
463  else
464    allocator->stamp_counter++;
465}
466
467static inline ChunkLink*
468magazine_chain_prepare_fields (ChunkLink *magazine_chunks)
469{
470  ChunkLink *chunk1;
471  ChunkLink *chunk2;
472  ChunkLink *chunk3;
473  ChunkLink *chunk4;
474  /* checked upon initialization: mem_assert (MIN_MAGAZINE_SIZE >= 4); */
475  /* ensure a magazine with at least 4 unused data pointers */
476  chunk1 = magazine_chain_pop_head (&magazine_chunks);
477  chunk2 = magazine_chain_pop_head (&magazine_chunks);
478  chunk3 = magazine_chain_pop_head (&magazine_chunks);
479  chunk4 = magazine_chain_pop_head (&magazine_chunks);
480  chunk4->next = magazine_chunks;
481  chunk3->next = chunk4;
482  chunk2->next = chunk3;
483  chunk1->next = chunk2;
484  return chunk1;
485}
486
487/* access the first 3 fields of a specially prepared magazine chain */
488#define magazine_chain_prev(mc)         ((mc)->data)
489#define magazine_chain_stamp(mc)        ((mc)->next->data)
490#define magazine_chain_uint_stamp(mc)   GPOINTER_TO_UINT ((mc)->next->data)
491#define magazine_chain_next(mc)         ((mc)->next->next->data)
492#define magazine_chain_count(mc)        ((mc)->next->next->next->data)
493
494static void
495magazine_cache_trim (Allocator *allocator,
496                     guint      ix,
497                     guint      stamp)
498{
499  /* g_mutex_lock (allocator->mutex); done by caller */
500  /* trim magazine cache from tail */
501  ChunkLink *current = magazine_chain_prev (allocator->magazines[ix]);
502  ChunkLink *trash = NULL;
503  while (ABS (stamp - magazine_chain_uint_stamp (current)) >= allocator->config.working_set_msecs)
504    {
505      /* unlink */
506      ChunkLink *prev = magazine_chain_prev (current);
507      ChunkLink *next = magazine_chain_next (current);
508      magazine_chain_next (prev) = next;
509      magazine_chain_prev (next) = prev;
510      /* clear special fields, put on trash stack */
511      magazine_chain_next (current) = NULL;
512      magazine_chain_count (current) = NULL;
513      magazine_chain_stamp (current) = NULL;
514      magazine_chain_prev (current) = trash;
515      trash = current;
516      /* fixup list head if required */
517      if (current == allocator->magazines[ix])
518        {
519          allocator->magazines[ix] = NULL;
520          break;
521        }
522      current = prev;
523    }
524  g_mutex_unlock (allocator->magazine_mutex);
525  /* free trash */
526  if (trash)
527    {
528      const gsize chunk_size = SLAB_CHUNK_SIZE (allocator, ix);
529      g_mutex_lock (allocator->slab_mutex);
530      while (trash)
531        {
532          current = trash;
533          trash = magazine_chain_prev (current);
534          magazine_chain_prev (current) = NULL; /* clear special field */
535          while (current)
536            {
537              ChunkLink *chunk = magazine_chain_pop_head (&current);
538              slab_allocator_free_chunk (chunk_size, chunk);
539            }
540        }
541      g_mutex_unlock (allocator->slab_mutex);
542    }
543}
544
545static void
546magazine_cache_push_magazine (guint      ix,
547                              ChunkLink *magazine_chunks,
548                              gsize      count) /* must be >= MIN_MAGAZINE_SIZE */
549{
550  ChunkLink *current = magazine_chain_prepare_fields (magazine_chunks);
551  ChunkLink *next, *prev;
552  g_mutex_lock (allocator->magazine_mutex);
553  /* add magazine at head */
554  next = allocator->magazines[ix];
555  if (next)
556    prev = magazine_chain_prev (next);
557  else
558    next = prev = current;
559  magazine_chain_next (prev) = current;
560  magazine_chain_prev (next) = current;
561  magazine_chain_prev (current) = prev;
562  magazine_chain_next (current) = next;
563  magazine_chain_count (current) = (gpointer) count;
564  /* stamp magazine */
565  magazine_cache_update_stamp();
566  magazine_chain_stamp (current) = GUINT_TO_POINTER (allocator->last_stamp);
567  allocator->magazines[ix] = current;
568  /* free old magazines beyond a certain threshold */
569  magazine_cache_trim (allocator, ix, allocator->last_stamp);
570  /* g_mutex_unlock (allocator->mutex); was done by magazine_cache_trim() */
571}
572
573static ChunkLink*
574magazine_cache_pop_magazine (guint  ix,
575                             gsize *countp)
576{
577  g_mutex_lock_a (allocator->magazine_mutex, &allocator->contention_counters[ix]);
578  if (!allocator->magazines[ix])
579    {
580      guint magazine_threshold = allocator_get_magazine_threshold (allocator, ix);
581      gsize i, chunk_size = SLAB_CHUNK_SIZE (allocator, ix);
582      ChunkLink *chunk, *head;
583      g_mutex_unlock (allocator->magazine_mutex);
584      g_mutex_lock (allocator->slab_mutex);
585      head = slab_allocator_alloc_chunk (chunk_size);
586      head->data = NULL;
587      chunk = head;
588      for (i = 1; i < magazine_threshold; i++)
589        {
590          chunk->next = slab_allocator_alloc_chunk (chunk_size);
591          chunk = chunk->next;
592          chunk->data = NULL;
593        }
594      chunk->next = NULL;
595      g_mutex_unlock (allocator->slab_mutex);
596      *countp = i;
597      return head;
598    }
599  else
600    {
601      ChunkLink *current = allocator->magazines[ix];
602      ChunkLink *prev = magazine_chain_prev (current);
603      ChunkLink *next = magazine_chain_next (current);
604      /* unlink */
605      magazine_chain_next (prev) = next;
606      magazine_chain_prev (next) = prev;
607      allocator->magazines[ix] = next == current ? NULL : next;
608      g_mutex_unlock (allocator->magazine_mutex);
609      /* clear special fields and hand out */
610      *countp = (gsize) magazine_chain_count (current);
611      magazine_chain_prev (current) = NULL;
612      magazine_chain_next (current) = NULL;
613      magazine_chain_count (current) = NULL;
614      magazine_chain_stamp (current) = NULL;
615      return current;
616    }
617}
618
619/* --- thread magazines --- */
620static void
621private_thread_memory_cleanup (gpointer data)
622{
623  ThreadMemory *tmem = data;
624  const guint n_magazines = MAX_SLAB_INDEX (allocator);
625  guint ix;
626  for (ix = 0; ix < n_magazines; ix++)
627    {
628      Magazine *mags[2];
629      guint j;
630      mags[0] = &tmem->magazine1[ix];
631      mags[1] = &tmem->magazine2[ix];
632      for (j = 0; j < 2; j++)
633        {
634          Magazine *mag = mags[j];
635          if (mag->count >= MIN_MAGAZINE_SIZE)
636            magazine_cache_push_magazine (ix, mag->chunks, mag->count);
637          else
638            {
639              const gsize chunk_size = SLAB_CHUNK_SIZE (allocator, ix);
640              g_mutex_lock (allocator->slab_mutex);
641              while (mag->chunks)
642                {
643                  ChunkLink *chunk = magazine_chain_pop_head (&mag->chunks);
644                  slab_allocator_free_chunk (chunk_size, chunk);
645                }
646              g_mutex_unlock (allocator->slab_mutex);
647            }
648        }
649    }
650  g_free (tmem);
651}
652
653static void
654thread_memory_magazine1_reload (ThreadMemory *tmem,
655                                guint         ix)
656{
657  Magazine *mag = &tmem->magazine1[ix];
658  mem_assert (mag->chunks == NULL); /* ensure that we may reset mag->count */
659  mag->count = 0;
660  mag->chunks = magazine_cache_pop_magazine (ix, &mag->count);
661}
662
663static void
664thread_memory_magazine2_unload (ThreadMemory *tmem,
665                                guint         ix)
666{
667  Magazine *mag = &tmem->magazine2[ix];
668  magazine_cache_push_magazine (ix, mag->chunks, mag->count);
669  mag->chunks = NULL;
670  mag->count = 0;
671}
672
673static inline void
674thread_memory_swap_magazines (ThreadMemory *tmem,
675                              guint         ix)
676{
677  Magazine xmag = tmem->magazine1[ix];
678  tmem->magazine1[ix] = tmem->magazine2[ix];
679  tmem->magazine2[ix] = xmag;
680}
681
682static inline gboolean
683thread_memory_magazine1_is_empty (ThreadMemory *tmem,
684                                  guint         ix)
685{
686  return tmem->magazine1[ix].chunks == NULL;
687}
688
689static inline gboolean
690thread_memory_magazine2_is_full (ThreadMemory *tmem,
691                                 guint         ix)
692{
693  return tmem->magazine2[ix].count >= allocator_get_magazine_threshold (allocator, ix);
694}
695
696static inline gpointer
697thread_memory_magazine1_alloc (ThreadMemory *tmem,
698                               guint         ix)
699{
700  Magazine *mag = &tmem->magazine1[ix];
701  ChunkLink *chunk = magazine_chain_pop_head (&mag->chunks);
702  if (G_LIKELY (mag->count > 0))
703    mag->count--;
704  return chunk;
705}
706
707static inline void
708thread_memory_magazine2_free (ThreadMemory *tmem,
709                              guint         ix,
710                              gpointer      mem)
711{
712  Magazine *mag = &tmem->magazine2[ix];
713  ChunkLink *chunk = mem;
714  chunk->data = NULL;
715  chunk->next = mag->chunks;
716  mag->chunks = chunk;
717  mag->count++;
718}
719
720/* --- API functions --- */
721gpointer
722g_slice_alloc (gsize mem_size)
723{
724  gsize chunk_size;
725  gpointer mem;
726  guint acat;
727  chunk_size = P2ALIGN (mem_size);
728  acat = allocator_categorize (chunk_size);
729  if (G_LIKELY (acat == 1))     /* allocate through magazine layer */
730    {
731      ThreadMemory *tmem = thread_memory_from_self();
732      guint ix = SLAB_INDEX (allocator, chunk_size);
733      if (G_UNLIKELY (thread_memory_magazine1_is_empty (tmem, ix)))
734        {
735          thread_memory_swap_magazines (tmem, ix);
736          if (G_UNLIKELY (thread_memory_magazine1_is_empty (tmem, ix)))
737            thread_memory_magazine1_reload (tmem, ix);
738        }
739      mem = thread_memory_magazine1_alloc (tmem, ix);
740    }
741  else if (acat == 2)           /* allocate through slab allocator */
742    {
743      g_mutex_lock (allocator->slab_mutex);
744      mem = slab_allocator_alloc_chunk (chunk_size);
745      g_mutex_unlock (allocator->slab_mutex);
746    }
747  else                          /* delegate to system malloc */
748    mem = g_malloc (mem_size);
749  return mem;
750}
751
752gpointer
753g_slice_alloc0 (gsize mem_size)
754{
755  gpointer mem = g_slice_alloc (mem_size);
756  if (mem)
757    memset (mem, 0, mem_size);
758  return mem;
759}
760
761void
762g_slice_free1 (gsize    mem_size,
763               gpointer mem_block)
764{
765  gsize chunk_size = P2ALIGN (mem_size);
766  guint acat = allocator_categorize (chunk_size);
767  if (G_UNLIKELY (!mem_block))
768    /* pass */;
769  else if (G_LIKELY (acat == 1))        /* allocate through magazine layer */
770    {
771      ThreadMemory *tmem = thread_memory_from_self();
772      guint ix = SLAB_INDEX (allocator, chunk_size);
773      if (G_UNLIKELY (thread_memory_magazine2_is_full (tmem, ix)))
774        {
775          thread_memory_swap_magazines (tmem, ix);
776          if (G_UNLIKELY (thread_memory_magazine2_is_full (tmem, ix)))
777            thread_memory_magazine2_unload (tmem, ix);
778        }
779      thread_memory_magazine2_free (tmem, ix, mem_block);
780    }
781  else if (acat == 2)                   /* allocate through slab allocator */
782    {
783      g_mutex_lock (allocator->slab_mutex);
784      slab_allocator_free_chunk (chunk_size, mem_block);
785      g_mutex_unlock (allocator->slab_mutex);
786    }
787  else                                  /* delegate to system malloc */
788    g_free (mem_block);
789}
790
791void
792g_slice_free_chain_with_offset (gsize    mem_size,
793                                gpointer mem_chain,
794                                gsize    next_offset)
795{
796  gpointer slice = mem_chain;
797  /* while the thread magazines and the magazine cache are implemented so that
798   * they can easily be extended to allow for free lists containing more free
799   * lists for the first level nodes, which would allow O(1) freeing in this
800   * function, the benefit of such an extension is questionable, because:
801   * - the magazine size counts will become mere lower bounds which confuses
802   *   the code adapting to lock contention;
803   * - freeing a single node to the thread magazines is very fast, so this
804   *   O(list_length) operation is multiplied by a fairly small factor;
805   * - memory usage histograms on larger applications seem to indicate that
806   *   the amount of released multi node lists is negligible in comparison
807   *   to single node releases.
808   * - the major performance bottle neck, namely g_private_get() or
809   *   g_mutex_lock()/g_mutex_unlock() has already been moved out of the
810   *   inner loop for freeing chained slices.
811   */
812  gsize chunk_size = P2ALIGN (mem_size);
813  guint acat = allocator_categorize (chunk_size);
814  if (G_LIKELY (acat == 1))             /* allocate through magazine layer */
815    {
816      ThreadMemory *tmem = thread_memory_from_self();
817      guint ix = SLAB_INDEX (allocator, chunk_size);
818      while (slice)
819        {
820          guint8 *current = slice;
821          slice = *(gpointer*) (current + next_offset);
822          if (G_UNLIKELY (thread_memory_magazine2_is_full (tmem, ix)))
823            {
824              thread_memory_swap_magazines (tmem, ix);
825              if (G_UNLIKELY (thread_memory_magazine2_is_full (tmem, ix)))
826                thread_memory_magazine2_unload (tmem, ix);
827            }
828          thread_memory_magazine2_free (tmem, ix, current);
829        }
830    }
831  else if (acat == 2)                   /* allocate through slab allocator */
832    {
833      g_mutex_lock (allocator->slab_mutex);
834      while (slice)
835        {
836          guint8 *current = slice;
837          slice = *(gpointer*) (current + next_offset);
838          slab_allocator_free_chunk (chunk_size, current);
839        }
840      g_mutex_unlock (allocator->slab_mutex);
841    }
842  else                                  /* delegate to system malloc */
843    while (slice)
844      {
845        guint8 *current = slice;
846        slice = *(gpointer*) (current + next_offset);
847        g_free (current);
848      }
849}
850
851/* --- single page allocator --- */
852static void
853allocator_slab_stack_push (Allocator *allocator,
854                           guint      ix,
855                           SlabInfo  *sinfo)
856{
857  /* insert slab at slab ring head */
858  if (!allocator->slab_stack[ix])
859    {
860      sinfo->next = sinfo;
861      sinfo->prev = sinfo;
862    }
863  else
864    {
865      SlabInfo *next = allocator->slab_stack[ix], *prev = next->prev;
866      next->prev = sinfo;
867      prev->next = sinfo;
868      sinfo->next = next;
869      sinfo->prev = prev;
870    }
871  allocator->slab_stack[ix] = sinfo;
872}
873
874static gsize
875allocator_aligned_page_size (Allocator *allocator,
876                             gsize      n_bytes)
877{
878  gsize val = 1 << g_bit_storage (n_bytes - 1);
879  val = MAX (val, allocator->min_page_size);
880  return val;
881}
882
883static void
884allocator_add_slab (Allocator *allocator,
885                    guint      ix,
886                    gsize      chunk_size)
887{
888  ChunkLink *chunk;
889  SlabInfo *sinfo;
890  gsize addr, padding, n_chunks, color = 0;
891  gsize page_size = allocator_aligned_page_size (allocator, SLAB_BPAGE_SIZE (allocator, chunk_size));
892  /* allocate 1 page for the chunks and the slab */
893  gpointer aligned_memory = allocator_memalign (page_size, page_size - NATIVE_MALLOC_PADDING);
894  guint8 *mem = aligned_memory;
895  guint i;
896  if (!mem)
897    {
898      const gchar *syserr = "unknown error";
899#if HAVE_STRERROR
900      syserr = strerror (errno);
901#endif
902      mem_error ("failed to allocate %u bytes (alignment: %u): %s\n",
903                 (guint) (page_size - NATIVE_MALLOC_PADDING), (guint) page_size, syserr);
904    }
905  /* mask page adress */
906  addr = ((gsize) mem / page_size) * page_size;
907  /* assert alignment */
908  mem_assert (aligned_memory == (gpointer) addr);
909  /* basic slab info setup */
910  sinfo = (SlabInfo*) (mem + page_size - SLAB_INFO_SIZE);
911  sinfo->n_allocated = 0;
912  sinfo->chunks = NULL;
913  /* figure cache colorization */
914  n_chunks = ((guint8*) sinfo - mem) / chunk_size;
915  padding = ((guint8*) sinfo - mem) - n_chunks * chunk_size;
916  if (padding)
917    {
918      color = (allocator->color_accu * P2ALIGNMENT) % padding;
919      allocator->color_accu += allocator->config.color_increment;
920    }
921  /* add chunks to free list */
922  chunk = (ChunkLink*) (mem + color);
923  sinfo->chunks = chunk;
924  for (i = 0; i < n_chunks - 1; i++)
925    {
926      chunk->next = (ChunkLink*) ((guint8*) chunk + chunk_size);
927      chunk = chunk->next;
928    }
929  chunk->next = NULL;   /* last chunk */
930  /* add slab to slab ring */
931  allocator_slab_stack_push (allocator, ix, sinfo);
932}
933
934static gpointer
935slab_allocator_alloc_chunk (gsize chunk_size)
936{
937  ChunkLink *chunk;
938  guint ix = SLAB_INDEX (allocator, chunk_size);
939  /* ensure non-empty slab */
940  if (!allocator->slab_stack[ix] || !allocator->slab_stack[ix]->chunks)
941    allocator_add_slab (allocator, ix, chunk_size);
942  /* allocate chunk */
943  chunk = allocator->slab_stack[ix]->chunks;
944  allocator->slab_stack[ix]->chunks = chunk->next;
945  allocator->slab_stack[ix]->n_allocated++;
946  /* rotate empty slabs */
947  if (!allocator->slab_stack[ix]->chunks)
948    allocator->slab_stack[ix] = allocator->slab_stack[ix]->next;
949  return chunk;
950}
951
952static void
953slab_allocator_free_chunk (gsize    chunk_size,
954                           gpointer mem)
955{
956  ChunkLink *chunk;
957  gboolean was_empty;
958  guint ix = SLAB_INDEX (allocator, chunk_size);
959  gsize page_size = allocator_aligned_page_size (allocator, SLAB_BPAGE_SIZE (allocator, chunk_size));
960  gsize addr = ((gsize) mem / page_size) * page_size;
961  /* mask page adress */
962  guint8 *page = (guint8*) addr;
963  SlabInfo *sinfo = (SlabInfo*) (page + page_size - SLAB_INFO_SIZE);
964  /* assert valid chunk count */
965  mem_assert (sinfo->n_allocated > 0);
966  /* add chunk to free list */
967  was_empty = sinfo->chunks == NULL;
968  chunk = (ChunkLink*) mem;
969  chunk->next = sinfo->chunks;
970  sinfo->chunks = chunk;
971  sinfo->n_allocated--;
972  /* keep slab ring partially sorted, empty slabs at end */
973  if (was_empty)
974    {
975      /* unlink slab */
976      SlabInfo *next = sinfo->next, *prev = sinfo->prev;
977      next->prev = prev;
978      prev->next = next;
979      if (allocator->slab_stack[ix] == sinfo)
980        allocator->slab_stack[ix] = next == sinfo ? NULL : next;
981      /* insert slab at head */
982      allocator_slab_stack_push (allocator, ix, sinfo);
983    }
984  /* eagerly free complete unused slabs */
985  if (!sinfo->n_allocated)
986    {
987      /* unlink slab */
988      SlabInfo *next = sinfo->next, *prev = sinfo->prev;
989      next->prev = prev;
990      prev->next = next;
991      if (allocator->slab_stack[ix] == sinfo)
992        allocator->slab_stack[ix] = next == sinfo ? NULL : next;
993      /* free slab */
994      allocator_memfree (page_size, page);
995    }
996}
997
998/* --- memalign implementation --- */
999#ifdef HAVE_MALLOC_H
1000#include <malloc.h>             /* memalign() */
1001#endif
1002
1003/* from config.h:
1004 * define HAVE_POSIX_MEMALIGN     1     // if free(posix_memalign(3)) works, <stdlib.h>
1005 * define HAVE_MEMALIGN           1     // if free(memalign(3)) works, <malloc.h>
1006 * define HAVE_VALLOC             1     // if free(valloc(3)) works, <stdlib.h> or <malloc.h>
1007 * if none is provided, we implement malloc(3)-based alloc-only page alignment
1008 */
1009
1010#if !(HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_VALLOC)
1011static GTrashStack *compat_valloc_trash = NULL;
1012#endif
1013
1014static gpointer
1015allocator_memalign (gsize alignment,
1016                    gsize memsize)
1017{
1018  gpointer aligned_memory = NULL;
1019  gint err = ENOMEM;
1020#if     HAVE_POSIX_MEMALIGN
1021  err = posix_memalign (&aligned_memory, alignment, memsize);
1022#elif   HAVE_MEMALIGN
1023  errno = 0;
1024  aligned_memory = memalign (alignment, memsize);
1025  err = errno;
1026#elif   HAVE_VALLOC
1027  errno = 0;
1028  aligned_memory = valloc (memsize);
1029  err = errno;
1030#else
1031  /* simplistic non-freeing page allocator */
1032  mem_assert (alignment == sys_page_size);
1033  mem_assert (memsize <= sys_page_size);
1034  if (!compat_valloc_trash)
1035    {
1036      const guint n_pages = 16;
1037      guint8 *mem = malloc (n_pages * sys_page_size);
1038      err = errno;
1039      if (mem)
1040        {
1041          gint i = n_pages;
1042          guint8 *amem = (guint8*) ALIGN ((gsize) mem, sys_page_size);
1043          if (amem != mem)
1044            i--;        /* mem wasn't page aligned */
1045          while (--i >= 0)
1046            g_trash_stack_push (&compat_valloc_trash, amem + i * sys_page_size);
1047        }
1048    }
1049  aligned_memory = g_trash_stack_pop (&compat_valloc_trash);
1050#endif
1051  if (!aligned_memory)
1052    errno = err;
1053  return aligned_memory;
1054}
1055
1056static void
1057allocator_memfree (gsize    memsize,
1058                   gpointer mem)
1059{
1060#if     HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_VALLOC
1061  free (mem);
1062#else
1063  mem_assert (memsize <= sys_page_size);
1064  g_trash_stack_push (&compat_valloc_trash, mem);
1065#endif
1066}
1067
1068#include <stdio.h>
1069
1070static void
1071mem_error (const char *format,
1072           ...)
1073{
1074  const char *pname;
1075  va_list args;
1076  /* at least, put out "MEMORY-ERROR", in case we segfault during the rest of the function */
1077  fputs ("\n***MEMORY-ERROR***: ", stderr);
1078  pname = g_get_prgname();
1079  fprintf (stderr, "%s[%u]: GSlice: ", pname ? pname : "", getpid());
1080  va_start (args, format);
1081  vfprintf (stderr, format, args);
1082  va_end (args);
1083  fputs ("\n", stderr);
1084  _exit (1);
1085}
1086
1087#define __G_SLICE_C__
1088#include "galiasdef.c"
1089