1/*
2 *
3 * Copyright (c) 1996,1997
4 * Silicon Graphics Computer Systems, Inc.
5 *
6 * Copyright (c) 1997
7 * Moscow Center for SPARC Technology
8 *
9 * Copyright (c) 1999
10 * Boris Fomitchev
11 *
12 * This material is provided "as is", with absolutely no warranty expressed
13 * or implied. Any use is at your own risk.
14 *
15 * Permission to use or copy this software for any purpose is hereby granted
16 * without fee, provided the above notices are retained on all copies.
17 * Permission to modify the code and to distribute modified code is granted,
18 * provided the above notices are retained, and a notice that the code was
19 * modified is included with the above copyright notice.
20 *
21 */
22
23#include "stlport_prefix.h"
24
25#include <memory>
26
27#if defined (__GNUC__) && (defined (__CYGWIN__) || defined (__MINGW32__))
28#  include <malloc.h>
29#endif
30
31#if defined (_STLP_PTHREADS) && !defined (_STLP_NO_THREADS)
32#  include <pthread_alloc>
33#  include <cerrno>
34#endif
35
36#include <stl/_threads.h>
37
38#include "lock_free_slist.h"
39
40#if defined (__WATCOMC__)
41#  pragma warning 13 9
42#  pragma warning 367 9
43#  pragma warning 368 9
44#endif
45
46#if defined (_STLP_SGI_THREADS)
47  // We test whether threads are in use before locking.
48  // Perhaps this should be moved into stl_threads.h, but that
49  // probably makes it harder to avoid the procedure call when
50  // it isn't needed.
51extern "C" {
52  extern int __us_rsthread_malloc;
53}
54#endif
55
56// Specialised debug form of new operator which does not provide "false"
57// memory leaks when run with debug CRT libraries.
58#if defined (_STLP_MSVC) && (_STLP_MSVC >= 1020 && defined (_STLP_DEBUG_ALLOC)) && !defined (_STLP_WCE)
59#  include <crtdbg.h>
60inline char* __stlp_new_chunk(size_t __bytes) {
61  void *__chunk = _STLP_CHECK_NULL_ALLOC(::operator new(__bytes, __FILE__, __LINE__));
62  return __STATIC_CAST(char*, __chunk);
63}
64inline void __stlp_delete_chunck(void* __p) { ::operator delete(__p, __FILE__, __LINE__); }
65#else
66#  ifdef _STLP_NODE_ALLOC_USE_MALLOC
67#    include <cstdlib>
68inline char* __stlp_new_chunk(size_t __bytes) {
69  // do not use _STLP_CHECK_NULL_ALLOC, this macro is dedicated to new operator.
70  void *__chunk = _STLP_VENDOR_CSTD::malloc(__bytes);
71  if (__chunk == 0) {
72    _STLP_THROW_BAD_ALLOC;
73  }
74  return __STATIC_CAST(char*, __chunk);
75}
76inline void __stlp_delete_chunck(void* __p) { _STLP_VENDOR_CSTD::free(__p); }
77#  else
78inline char* __stlp_new_chunk(size_t __bytes)
79{ return __STATIC_CAST(char*, _STLP_STD::__stl_new(__bytes)); }
80inline void __stlp_delete_chunck(void* __p) { _STLP_STD::__stl_delete(__p); }
81#  endif
82#endif
83
84/* This is an additional atomic operations to the ones already defined in
85 * stl/_threads.h, platform should try to support it to improve performance.
86 * __add_atomic_t _STLP_ATOMIC_ADD(volatile __add_atomic_t* __target, __add_atomic_t __val) :
87 * does *__target = *__target + __val and returns the old *__target value */
88typedef long __add_atomic_t;
89typedef unsigned long __uadd_atomic_t;
90
91#if defined (__GNUC__) && defined (__i386__)
92inline long _STLP_atomic_add_gcc_x86(long volatile* p, long addend) {
93  long result;
94  __asm__ __volatile__
95    ("lock; xaddl %1, %0;"
96    :"=m" (*p), "=r" (result)
97    :"m"  (*p), "1"  (addend)
98    :"cc");
99 return result + addend;
100}
101#  define _STLP_ATOMIC_ADD(__dst, __val)  _STLP_atomic_add_gcc_x86(__dst, __val)
102#elif defined (_STLP_WIN32THREADS)
103// The Win32 API function InterlockedExchangeAdd is not available on Windows 95.
104#  if !defined (_STLP_WIN95_LIKE)
105#    if defined (_STLP_NEW_PLATFORM_SDK)
106#      define _STLP_ATOMIC_ADD(__dst, __val) InterlockedExchangeAdd(__dst, __val)
107#    else
108#      define _STLP_ATOMIC_ADD(__dst, __val) InterlockedExchangeAdd(__CONST_CAST(__add_atomic_t*, __dst), __val)
109#    endif
110#  endif
111#endif
112
113#if defined (__OS400__)
114// dums 02/05/2007: is it really necessary ?
115enum { _ALIGN = 16, _ALIGN_SHIFT = 4 };
116#else
117enum { _ALIGN = 2 * sizeof(void*), _ALIGN_SHIFT = 2 + sizeof(void*) / 4 };
118#endif
119
120#define _S_FREELIST_INDEX(__bytes) ((__bytes - size_t(1)) >> (int)_ALIGN_SHIFT)
121
122_STLP_BEGIN_NAMESPACE
123
124// malloc_alloc out-of-memory handling
125static __oom_handler_type __oom_handler = __STATIC_CAST(__oom_handler_type, 0);
126
127#ifdef _STLP_THREADS
128_STLP_mutex __oom_handler_lock;
129#endif
130
131void* _STLP_CALL __malloc_alloc::allocate(size_t __n)
132{
133  void *__result = malloc(__n);
134  if ( 0 == __result ) {
135    __oom_handler_type __my_malloc_handler;
136
137    for (;;) {
138      {
139#ifdef _STLP_THREADS
140        _STLP_auto_lock _l( __oom_handler_lock );
141#endif
142        __my_malloc_handler = __oom_handler;
143      }
144      if ( 0 == __my_malloc_handler) {
145        _STLP_THROW_BAD_ALLOC;
146      }
147      (*__my_malloc_handler)();
148      __result = malloc(__n);
149      if ( __result )
150        return __result;
151    }
152  }
153  return __result;
154}
155
156__oom_handler_type _STLP_CALL __malloc_alloc::set_malloc_handler(__oom_handler_type __f)
157{
158#ifdef _STLP_THREADS
159  _STLP_auto_lock _l( __oom_handler_lock );
160#endif
161  __oom_handler_type __old = __oom_handler;
162  __oom_handler = __f;
163  return __old;
164}
165
166// *******************************************************
167// Default node allocator.
168// With a reasonable compiler, this should be roughly as fast as the
169// original STL class-specific allocators, but with less fragmentation.
170//
171// Important implementation properties:
172// 1. If the client request an object of size > _MAX_BYTES, the resulting
173//    object will be obtained directly from malloc.
174// 2. In all other cases, we allocate an object of size exactly
175//    _S_round_up(requested_size).  Thus the client has enough size
176//    information that we can return the object to the proper free list
177//    without permanently losing part of the object.
178//
179
180#define _STLP_NFREELISTS 16
181
182#if defined (_STLP_LEAKS_PEDANTIC) && defined (_STLP_USE_DYNAMIC_LIB)
183/*
184 * We can only do cleanup of the node allocator memory pool if we are
185 * sure that the STLport library is used as a shared one as it guaranties
186 * the unicity of the node allocator instance. Without that guaranty node
187 * allocator instances might exchange memory blocks making the implementation
188 * of a cleaning process much more complicated.
189 */
190#  define _STLP_DO_CLEAN_NODE_ALLOC
191#endif
192
193/* When STLport is used without multi threaded safety we use the node allocator
194 * implementation with locks as locks becomes no-op. The lock free implementation
195 * always use system specific atomic operations which are slower than 'normal'
196 * ones.
197 */
198#if defined (_STLP_THREADS) && \
199    defined (_STLP_HAS_ATOMIC_FREELIST) && defined (_STLP_ATOMIC_ADD)
200/*
201 * We have an implementation of the atomic freelist (_STLP_atomic_freelist)
202 * for this architecture and compiler.  That means we can use the non-blocking
203 * implementation of the node-allocation engine.*/
204#  define _STLP_USE_LOCK_FREE_IMPLEMENTATION
205#endif
206
207#if !defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION)
208#  if defined (_STLP_THREADS)
209
210class _Node_Alloc_Lock {
211  static _STLP_STATIC_MUTEX& _S_Mutex() {
212    static _STLP_STATIC_MUTEX mutex _STLP_MUTEX_INITIALIZER;
213    return mutex;
214  }
215public:
216  _Node_Alloc_Lock() {
217#    if defined (_STLP_SGI_THREADS)
218    if (__us_rsthread_malloc)
219#    endif
220      _S_Mutex()._M_acquire_lock();
221  }
222
223  ~_Node_Alloc_Lock() {
224#    if defined (_STLP_SGI_THREADS)
225    if (__us_rsthread_malloc)
226#    endif
227      _S_Mutex()._M_release_lock();
228  }
229};
230
231#  else
232
233class _Node_Alloc_Lock {
234public:
235  _Node_Alloc_Lock() { }
236  ~_Node_Alloc_Lock() { }
237};
238
239#  endif
240
241struct _Node_alloc_obj {
242  _Node_alloc_obj * _M_next;
243};
244#endif
245
246class __node_alloc_impl {
247  static inline size_t _STLP_CALL _S_round_up(size_t __bytes)
248  { return (((__bytes) + (size_t)_ALIGN-1) & ~((size_t)_ALIGN - 1)); }
249
250#if defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION)
251  typedef _STLP_atomic_freelist::item   _Obj;
252  typedef _STLP_atomic_freelist         _Freelist;
253  typedef _STLP_atomic_freelist         _ChunkList;
254
255  // Header of blocks of memory that have been allocated as part of
256  // a larger chunk but have not yet been chopped up into nodes.
257  struct _FreeBlockHeader : public _STLP_atomic_freelist::item {
258    char* _M_end;     // pointer to end of free memory
259  };
260#else
261  typedef _Node_alloc_obj       _Obj;
262  typedef _Obj* _STLP_VOLATILE  _Freelist;
263  typedef _Obj*                 _ChunkList;
264#endif
265
266private:
267  // Returns an object of size __n, and optionally adds to size __n free list.
268  static _Obj* _S_refill(size_t __n);
269  // Allocates a chunk for nobjs of size __p_size.  nobjs may be reduced
270  // if it is inconvenient to allocate the requested number.
271  static char* _S_chunk_alloc(size_t __p_size, int& __nobjs);
272  // Chunk allocation state.
273  static _Freelist _S_free_list[_STLP_NFREELISTS];
274  // Amount of total allocated memory
275#if defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION)
276  static _STLP_VOLATILE __add_atomic_t _S_heap_size;
277#else
278  static size_t _S_heap_size;
279#endif
280
281#if defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION)
282  // List of blocks of free memory
283  static _STLP_atomic_freelist  _S_free_mem_blocks;
284#else
285  // Start of the current free memory buffer
286  static char* _S_start_free;
287  // End of the current free memory buffer
288  static char* _S_end_free;
289#endif
290
291#if defined (_STLP_DO_CLEAN_NODE_ALLOC)
292public:
293  // Methods to report alloc/dealloc calls to the counter system.
294#  if defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION)
295  typedef _STLP_VOLATILE __stl_atomic_t _AllocCounter;
296#  else
297  typedef __stl_atomic_t _AllocCounter;
298#  endif
299  static _AllocCounter& _STLP_CALL _S_alloc_counter();
300  static void _S_alloc_call();
301  static void _S_dealloc_call();
302
303private:
304  // Free all the allocated chuncks of memory
305  static void _S_chunk_dealloc();
306  // Beginning of the linked list of allocated chunks of memory
307  static _ChunkList _S_chunks;
308#endif /* _STLP_DO_CLEAN_NODE_ALLOC */
309
310public:
311  /* __n must be > 0      */
312  static void* _M_allocate(size_t& __n);
313  /* __p may not be 0 */
314  static void _M_deallocate(void *__p, size_t __n);
315};
316
317#if !defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION)
318void* __node_alloc_impl::_M_allocate(size_t& __n) {
319  __n = _S_round_up(__n);
320  _Obj * _STLP_VOLATILE * __my_free_list = _S_free_list + _S_FREELIST_INDEX(__n);
321  _Obj *__r;
322
323  // Acquire the lock here with a constructor call.
324  // This ensures that it is released in exit or during stack
325  // unwinding.
326  _Node_Alloc_Lock __lock_instance;
327
328  if ( (__r  = *__my_free_list) != 0 ) {
329    *__my_free_list = __r->_M_next;
330  } else {
331    __r = _S_refill(__n);
332  }
333#  if defined (_STLP_DO_CLEAN_NODE_ALLOC)
334  _S_alloc_call();
335#  endif
336  // lock is released here
337  return __r;
338}
339
340void __node_alloc_impl::_M_deallocate(void *__p, size_t __n) {
341  _Obj * _STLP_VOLATILE * __my_free_list = _S_free_list + _S_FREELIST_INDEX(__n);
342  _Obj * __pobj = __STATIC_CAST(_Obj*, __p);
343
344  // acquire lock
345  _Node_Alloc_Lock __lock_instance;
346  __pobj->_M_next = *__my_free_list;
347  *__my_free_list = __pobj;
348
349#  if defined (_STLP_DO_CLEAN_NODE_ALLOC)
350  _S_dealloc_call();
351#  endif
352  // lock is released here
353}
354
355#  if defined (_STLP_DO_CLEAN_NODE_ALLOC)
356#    define _STLP_OFFSET sizeof(_Obj)
357#  else
358#    define _STLP_OFFSET 0
359#  endif
360
361/* We allocate memory in large chunks in order to avoid fragmenting     */
362/* the malloc heap too much.                                            */
363/* We assume that size is properly aligned.                             */
364/* We hold the allocation lock.                                         */
365char* __node_alloc_impl::_S_chunk_alloc(size_t _p_size, int& __nobjs) {
366  char* __result;
367  size_t __total_bytes = _p_size * __nobjs;
368  size_t __bytes_left = _S_end_free - _S_start_free;
369
370  if (__bytes_left > 0) {
371    if (__bytes_left >= __total_bytes) {
372      __result = _S_start_free;
373      _S_start_free += __total_bytes;
374      return __result;
375    }
376
377    if (__bytes_left >= _p_size) {
378      __nobjs = (int)(__bytes_left / _p_size);
379      __total_bytes = _p_size * __nobjs;
380      __result = _S_start_free;
381      _S_start_free += __total_bytes;
382      return __result;
383    }
384
385    // Try to make use of the left-over piece.
386    _Obj* _STLP_VOLATILE* __my_free_list = _S_free_list + _S_FREELIST_INDEX(__bytes_left);
387    __REINTERPRET_CAST(_Obj*, _S_start_free)->_M_next = *__my_free_list;
388    *__my_free_list = __REINTERPRET_CAST(_Obj*, _S_start_free);
389    _S_start_free = _S_end_free = 0;
390  }
391
392  size_t __bytes_to_get = 2 * __total_bytes + _S_round_up(_S_heap_size) + _STLP_OFFSET;
393
394  _STLP_TRY {
395    _S_start_free = __stlp_new_chunk(__bytes_to_get);
396  }
397#if defined (_STLP_USE_EXCEPTIONS)
398  catch (const _STLP_STD::bad_alloc&) {
399    _Obj* _STLP_VOLATILE* __my_free_list;
400    _Obj* __p;
401    // Try to do with what we have.  That can't hurt.
402    // We do not try smaller requests, since that tends
403    // to result in disaster on multi-process machines.
404    for (size_t __i = _p_size; __i <= (size_t)_MAX_BYTES; __i += (size_t)_ALIGN) {
405      __my_free_list = _S_free_list + _S_FREELIST_INDEX(__i);
406      __p = *__my_free_list;
407      if (0 != __p) {
408        *__my_free_list = __p -> _M_next;
409        _S_start_free = __REINTERPRET_CAST(char*, __p);
410        _S_end_free = _S_start_free + __i;
411        return _S_chunk_alloc(_p_size, __nobjs);
412        // Any leftover piece will eventually make it to the
413        // right free list.
414      }
415    }
416    __bytes_to_get = __total_bytes + _STLP_OFFSET;
417    _S_start_free = __stlp_new_chunk(__bytes_to_get);
418  }
419#endif
420
421  _S_heap_size += __bytes_to_get >> 4;
422#  if defined (_STLP_DO_CLEAN_NODE_ALLOC)
423  __REINTERPRET_CAST(_Obj*, _S_start_free)->_M_next = _S_chunks;
424  _S_chunks = __REINTERPRET_CAST(_Obj*, _S_start_free);
425#  endif
426  _S_end_free = _S_start_free + __bytes_to_get;
427  _S_start_free += _STLP_OFFSET;
428  return _S_chunk_alloc(_p_size, __nobjs);
429}
430
431/* Returns an object of size __n, and optionally adds to size __n free list.*/
432/* We assume that __n is properly aligned.                                  */
433/* We hold the allocation lock.                                             */
434_Node_alloc_obj* __node_alloc_impl::_S_refill(size_t __n) {
435  int __nobjs = 20;
436  char* __chunk = _S_chunk_alloc(__n, __nobjs);
437
438  if (1 == __nobjs) return __REINTERPRET_CAST(_Obj*, __chunk);
439
440  _Obj* _STLP_VOLATILE* __my_free_list = _S_free_list + _S_FREELIST_INDEX(__n);
441  _Obj* __result;
442  _Obj* __current_obj;
443  _Obj* __next_obj;
444
445  /* Build free list in chunk */
446  __result = __REINTERPRET_CAST(_Obj*, __chunk);
447  *__my_free_list = __next_obj = __REINTERPRET_CAST(_Obj*, __chunk + __n);
448  for (--__nobjs; --__nobjs; ) {
449    __current_obj = __next_obj;
450    __next_obj = __REINTERPRET_CAST(_Obj*, __REINTERPRET_CAST(char*, __next_obj) + __n);
451    __current_obj->_M_next = __next_obj;
452  }
453  __next_obj->_M_next = 0;
454  return __result;
455}
456
457#  if defined (_STLP_DO_CLEAN_NODE_ALLOC)
458void __node_alloc_impl::_S_alloc_call()
459{ ++_S_alloc_counter(); }
460
461void __node_alloc_impl::_S_dealloc_call() {
462  __stl_atomic_t &counter = _S_alloc_counter();
463  if (--counter == 0)
464  { _S_chunk_dealloc(); }
465}
466
467/* We deallocate all the memory chunks      */
468void __node_alloc_impl::_S_chunk_dealloc() {
469  _Obj *__pcur = _S_chunks, *__pnext;
470  while (__pcur != 0) {
471    __pnext = __pcur->_M_next;
472    __stlp_delete_chunck(__pcur);
473    __pcur = __pnext;
474  }
475  _S_chunks = 0;
476  _S_start_free = _S_end_free = 0;
477  _S_heap_size = 0;
478  memset(__REINTERPRET_CAST(char*, __CONST_CAST(_Obj**, &_S_free_list[0])), 0, _STLP_NFREELISTS * sizeof(_Obj*));
479}
480#  endif
481
482#else
483
484void* __node_alloc_impl::_M_allocate(size_t& __n) {
485  __n = _S_round_up(__n);
486  _Obj* __r = _S_free_list[_S_FREELIST_INDEX(__n)].pop();
487  if (__r  == 0)
488  { __r = _S_refill(__n); }
489
490#  if defined (_STLP_DO_CLEAN_NODE_ALLOC)
491  _S_alloc_call();
492#  endif
493  return __r;
494}
495
496void __node_alloc_impl::_M_deallocate(void *__p, size_t __n) {
497  _S_free_list[_S_FREELIST_INDEX(__n)].push(__STATIC_CAST(_Obj*, __p));
498
499#  if defined (_STLP_DO_CLEAN_NODE_ALLOC)
500  _S_dealloc_call();
501#  endif
502}
503
504/* Returns an object of size __n, and optionally adds additional ones to    */
505/* freelist of objects of size __n.                                         */
506/* We assume that __n is properly aligned.                                  */
507__node_alloc_impl::_Obj* __node_alloc_impl::_S_refill(size_t __n) {
508  int __nobjs = 20;
509  char* __chunk = _S_chunk_alloc(__n, __nobjs);
510
511  if (__nobjs <= 1)
512    return __REINTERPRET_CAST(_Obj*, __chunk);
513
514  // Push all new nodes (minus first one) onto freelist
515  _Obj* __result   = __REINTERPRET_CAST(_Obj*, __chunk);
516  _Obj* __cur_item = __result;
517  _Freelist* __my_freelist = _S_free_list + _S_FREELIST_INDEX(__n);
518  for (--__nobjs; __nobjs != 0; --__nobjs) {
519    __cur_item  = __REINTERPRET_CAST(_Obj*, __REINTERPRET_CAST(char*, __cur_item) + __n);
520    __my_freelist->push(__cur_item);
521  }
522  return __result;
523}
524
525#  if defined (_STLP_DO_CLEAN_NODE_ALLOC)
526#    define _STLP_OFFSET _ALIGN
527#  else
528#    define _STLP_OFFSET 0
529#  endif
530
531/* We allocate memory in large chunks in order to avoid fragmenting     */
532/* the malloc heap too much.                                            */
533/* We assume that size is properly aligned.                             */
534char* __node_alloc_impl::_S_chunk_alloc(size_t _p_size, int& __nobjs) {
535#  if defined (_STLP_DO_CLEAN_NODE_ALLOC)
536  //We are going to add a small memory block to keep all the allocated blocks
537  //address, we need to do so respecting the memory alignment. The following
538  //static assert checks that the reserved block is big enough to store a pointer.
539  _STLP_STATIC_ASSERT(sizeof(_Obj) <= _ALIGN)
540#  endif
541  char*  __result       = 0;
542  __add_atomic_t __total_bytes  = __STATIC_CAST(__add_atomic_t, _p_size) * __nobjs;
543
544  _FreeBlockHeader* __block = __STATIC_CAST(_FreeBlockHeader*, _S_free_mem_blocks.pop());
545  if (__block != 0) {
546    // We checked a block out and can now mess with it with impugnity.
547    // We'll put the remainder back into the list if we're done with it below.
548    char*  __buf_start  = __REINTERPRET_CAST(char*, __block);
549    __add_atomic_t __bytes_left = __block->_M_end - __buf_start;
550
551    if ((__bytes_left < __total_bytes) && (__bytes_left >= __STATIC_CAST(__add_atomic_t, _p_size))) {
552      // There's enough left for at least one object, but not as much as we wanted
553      __result      = __buf_start;
554      __nobjs       = (int)(__bytes_left/_p_size);
555      __total_bytes = __STATIC_CAST(__add_atomic_t, _p_size) * __nobjs;
556      __bytes_left -= __total_bytes;
557      __buf_start  += __total_bytes;
558    }
559    else if (__bytes_left >= __total_bytes) {
560      // The block has enough left to satisfy all that was asked for
561      __result      = __buf_start;
562      __bytes_left -= __total_bytes;
563      __buf_start  += __total_bytes;
564    }
565
566    if (__bytes_left != 0) {
567      // There is still some memory left over in block after we satisfied our request.
568      if ((__result != 0) && (__bytes_left >= (__add_atomic_t)sizeof(_FreeBlockHeader))) {
569        // We were able to allocate at least one object and there is still enough
570        // left to put remainder back into list.
571        _FreeBlockHeader* __newblock = __REINTERPRET_CAST(_FreeBlockHeader*, __buf_start);
572        __newblock->_M_end  = __block->_M_end;
573        _S_free_mem_blocks.push(__newblock);
574      }
575      else {
576        // We were not able to allocate enough for at least one object.
577        // Shove into freelist of nearest (rounded-down!) size.
578        size_t __rounded_down = _S_round_up(__bytes_left + 1) - (size_t)_ALIGN;
579        if (__rounded_down > 0)
580          _S_free_list[_S_FREELIST_INDEX(__rounded_down)].push((_Obj*)__buf_start);
581      }
582    }
583    if (__result != 0)
584      return __result;
585  }
586
587  // We couldn't satisfy it from the list of free blocks, get new memory.
588  __add_atomic_t __bytes_to_get = 2 * __total_bytes +
589                                  __STATIC_CAST(__add_atomic_t,
590                                                _S_round_up(__STATIC_CAST(__uadd_atomic_t, _STLP_ATOMIC_ADD(&_S_heap_size, 0)))) +
591                                  _STLP_OFFSET;
592  _STLP_TRY {
593    __result = __stlp_new_chunk(__bytes_to_get);
594  }
595#if defined (_STLP_USE_EXCEPTIONS)
596  catch (const bad_alloc&) {
597    // Allocation failed; try to canibalize from freelist of a larger object size.
598    for (size_t __i = _p_size; __i <= (size_t)_MAX_BYTES; __i += (size_t)_ALIGN) {
599      _Obj* __p  = _S_free_list[_S_FREELIST_INDEX(__i)].pop();
600      if (0 != __p) {
601        if (__i < sizeof(_FreeBlockHeader)) {
602          // Not enough to put into list of free blocks, divvy it up here.
603          // Use as much as possible for this request and shove remainder into freelist.
604          __nobjs = (int)(__i/_p_size);
605          __total_bytes = __nobjs * __STATIC_CAST(__add_atomic_t, _p_size);
606          size_t __bytes_left = __i - __total_bytes;
607          size_t __rounded_down = _S_round_up(__bytes_left+1) - (size_t)_ALIGN;
608          if (__rounded_down > 0) {
609            _S_free_list[_S_FREELIST_INDEX(__rounded_down)].push(__REINTERPRET_CAST(_Obj*, __REINTERPRET_CAST(char*, __p) + __total_bytes));
610          }
611          return __REINTERPRET_CAST(char*, __p);
612        }
613        else {
614          // Add node to list of available blocks and recursively allocate from it.
615          _FreeBlockHeader* __newblock = (_FreeBlockHeader*)__p;
616          __newblock->_M_end  = __REINTERPRET_CAST(char*, __p) + __i;
617          _S_free_mem_blocks.push(__newblock);
618          return _S_chunk_alloc(_p_size, __nobjs);
619        }
620      }
621    }
622
623    // We were not able to find something in a freelist, try to allocate a smaller amount.
624    __bytes_to_get  = __total_bytes + _STLP_OFFSET;
625    __result = __stlp_new_chunk(__bytes_to_get);
626
627    // This should either throw an exception or remedy the situation.
628    // Thus we assume it succeeded.
629  }
630#endif
631  // Alignment check
632  _STLP_VERBOSE_ASSERT(((__REINTERPRET_CAST(size_t, __result) & __STATIC_CAST(size_t, _ALIGN - 1)) == 0),
633                       _StlMsg_DBA_DELETED_TWICE)
634  _STLP_ATOMIC_ADD(&_S_heap_size, __bytes_to_get >> 4);
635
636#  if defined (_STLP_DO_CLEAN_NODE_ALLOC)
637  // We have to track the allocated memory chunks for release on exit.
638  _S_chunks.push(__REINTERPRET_CAST(_Obj*, __result));
639  __result       += _ALIGN;
640  __bytes_to_get -= _ALIGN;
641#  endif
642
643  if (__bytes_to_get > __total_bytes) {
644    // Push excess memory allocated in this chunk into list of free memory blocks
645    _FreeBlockHeader* __freeblock = __REINTERPRET_CAST(_FreeBlockHeader*, __result + __total_bytes);
646    __freeblock->_M_end  = __result + __bytes_to_get;
647    _S_free_mem_blocks.push(__freeblock);
648  }
649  return __result;
650}
651
652#  if defined (_STLP_DO_CLEAN_NODE_ALLOC)
653void __node_alloc_impl::_S_alloc_call()
654{ _STLP_ATOMIC_INCREMENT(&_S_alloc_counter()); }
655
656void __node_alloc_impl::_S_dealloc_call() {
657  _STLP_VOLATILE __stl_atomic_t *pcounter = &_S_alloc_counter();
658  if (_STLP_ATOMIC_DECREMENT(pcounter) == 0)
659    _S_chunk_dealloc();
660}
661
662/* We deallocate all the memory chunks      */
663void __node_alloc_impl::_S_chunk_dealloc() {
664  // Note: The _Node_alloc_helper class ensures that this function
665  // will only be called when the (shared) library is unloaded or the
666  // process is shutdown.  It's thus not possible that another thread
667  // is currently trying to allocate a node (we're not thread-safe here).
668  //
669
670  // Clear the free blocks and all freelistst.  This makes sure that if
671  // for some reason more memory is allocated again during shutdown
672  // (it'd also be really nasty to leave references to deallocated memory).
673  _S_free_mem_blocks.clear();
674  _S_heap_size      = 0;
675
676  for (size_t __i = 0; __i < _STLP_NFREELISTS; ++__i) {
677    _S_free_list[__i].clear();
678  }
679
680  // Detach list of chunks and free them all
681  _Obj* __chunk = _S_chunks.clear();
682  while (__chunk != 0) {
683    _Obj* __next = __chunk->_M_next;
684    __stlp_delete_chunck(__chunk);
685    __chunk  = __next;
686  }
687}
688#  endif
689
690#endif
691
692#if defined (_STLP_DO_CLEAN_NODE_ALLOC)
693struct __node_alloc_cleaner {
694  ~__node_alloc_cleaner()
695  { __node_alloc_impl::_S_dealloc_call(); }
696};
697
698#  if defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION)
699_STLP_VOLATILE __stl_atomic_t& _STLP_CALL
700#  else
701__stl_atomic_t& _STLP_CALL
702#  endif
703__node_alloc_impl::_S_alloc_counter() {
704  static _AllocCounter _S_counter = 1;
705  static __node_alloc_cleaner _S_node_alloc_cleaner;
706  return _S_counter;
707}
708#endif
709
710#if !defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION)
711_Node_alloc_obj * _STLP_VOLATILE
712__node_alloc_impl::_S_free_list[_STLP_NFREELISTS]
713= {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
714// The 16 zeros are necessary to make version 4.1 of the SunPro
715// compiler happy.  Otherwise it appears to allocate too little
716// space for the array.
717#else
718_STLP_atomic_freelist __node_alloc_impl::_S_free_list[_STLP_NFREELISTS];
719_STLP_atomic_freelist __node_alloc_impl::_S_free_mem_blocks;
720#endif
721
722#if !defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION)
723char *__node_alloc_impl::_S_start_free = 0;
724char *__node_alloc_impl::_S_end_free = 0;
725#endif
726
727#if defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION)
728_STLP_VOLATILE __add_atomic_t
729#else
730size_t
731#endif
732__node_alloc_impl::_S_heap_size = 0;
733
734#if defined (_STLP_DO_CLEAN_NODE_ALLOC)
735#  if defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION)
736_STLP_atomic_freelist __node_alloc_impl::_S_chunks;
737#  else
738_Node_alloc_obj* __node_alloc_impl::_S_chunks  = 0;
739#  endif
740#endif
741
742void * _STLP_CALL __node_alloc::_M_allocate(size_t& __n)
743{ return __node_alloc_impl::_M_allocate(__n); }
744
745void _STLP_CALL __node_alloc::_M_deallocate(void *__p, size_t __n)
746{ __node_alloc_impl::_M_deallocate(__p, __n); }
747
748#if defined (_STLP_PTHREADS) && !defined (_STLP_NO_THREADS)
749
750#  define _STLP_DATA_ALIGNMENT 8
751
752_STLP_MOVE_TO_PRIV_NAMESPACE
753
754// *******************************************************
755// __perthread_alloc implementation
756union _Pthread_alloc_obj {
757  union _Pthread_alloc_obj * __free_list_link;
758  char __client_data[_STLP_DATA_ALIGNMENT];    /* The client sees this.    */
759};
760
761// Pthread allocators don't appear to the client to have meaningful
762// instances.  We do in fact need to associate some state with each
763// thread.  That state is represented by _Pthread_alloc_per_thread_state.
764
765struct _Pthread_alloc_per_thread_state {
766  typedef _Pthread_alloc_obj __obj;
767  enum { _S_NFREELISTS = _MAX_BYTES / _STLP_DATA_ALIGNMENT };
768
769  // Free list link for list of available per thread structures.
770  // When one of these becomes available for reuse due to thread
771  // termination, any objects in its free list remain associated
772  // with it.  The whole structure may then be used by a newly
773  // created thread.
774  _Pthread_alloc_per_thread_state() : __next(0)
775  { memset((void *)__CONST_CAST(_Pthread_alloc_obj**, __free_list), 0, (size_t)_S_NFREELISTS * sizeof(__obj *)); }
776  // Returns an object of size __n, and possibly adds to size n free list.
777  void *_M_refill(size_t __n);
778
779  _Pthread_alloc_obj* volatile __free_list[_S_NFREELISTS];
780  _Pthread_alloc_per_thread_state *__next;
781  // this data member is only to be used by per_thread_allocator, which returns memory to the originating thread.
782  _STLP_mutex _M_lock;
783};
784
785// Pthread-specific allocator.
786class _Pthread_alloc_impl {
787public: // but only for internal use:
788  typedef _Pthread_alloc_per_thread_state __state_type;
789  typedef char value_type;
790
791  // Allocates a chunk for nobjs of size size.  nobjs may be reduced
792  // if it is inconvenient to allocate the requested number.
793  static char *_S_chunk_alloc(size_t __size, size_t &__nobjs, __state_type*);
794
795  enum {_S_ALIGN = _STLP_DATA_ALIGNMENT};
796
797  static size_t _S_round_up(size_t __bytes)
798  { return (((__bytes) + (int)_S_ALIGN - 1) & ~((int)_S_ALIGN - 1)); }
799  static size_t _S_freelist_index(size_t __bytes)
800  { return (((__bytes) + (int)_S_ALIGN - 1) / (int)_S_ALIGN - 1); }
801
802private:
803  // Chunk allocation state. And other shared state.
804  // Protected by _S_chunk_allocator_lock.
805  static _STLP_STATIC_MUTEX _S_chunk_allocator_lock;
806  static char *_S_start_free;
807  static char *_S_end_free;
808  static size_t _S_heap_size;
809  static __state_type *_S_free_per_thread_states;
810  static pthread_key_t _S_key;
811  static bool _S_key_initialized;
812  // Pthread key under which per thread state is stored.
813  // Allocator instances that are currently unclaimed by any thread.
814  static void _S_destructor(void *instance);
815  // Function to be called on thread exit to reclaim per thread
816  // state.
817  static __state_type *_S_new_per_thread_state();
818public:
819  // Return a recycled or new per thread state.
820  static __state_type *_S_get_per_thread_state();
821private:
822        // ensure that the current thread has an associated
823        // per thread state.
824  class _M_lock;
825  friend class _M_lock;
826  class _M_lock {
827  public:
828    _M_lock () { _S_chunk_allocator_lock._M_acquire_lock(); }
829    ~_M_lock () { _S_chunk_allocator_lock._M_release_lock(); }
830  };
831
832public:
833
834  /* n must be > 0      */
835  static void * allocate(size_t& __n);
836
837  /* p may not be 0 */
838  static void deallocate(void *__p, size_t __n);
839
840  // boris : versions for per_thread_allocator
841  /* n must be > 0      */
842  static void * allocate(size_t& __n, __state_type* __a);
843
844  /* p may not be 0 */
845  static void deallocate(void *__p, size_t __n, __state_type* __a);
846
847  static void * reallocate(void *__p, size_t __old_sz, size_t& __new_sz);
848};
849
850/* Returns an object of size n, and optionally adds to size n free list.*/
851/* We assume that n is properly aligned.                                */
852/* We hold the allocation lock.                                         */
853void *_Pthread_alloc_per_thread_state::_M_refill(size_t __n) {
854  typedef _Pthread_alloc_obj __obj;
855  size_t __nobjs = 128;
856  char * __chunk = _Pthread_alloc_impl::_S_chunk_alloc(__n, __nobjs, this);
857  __obj * volatile * __my_free_list;
858  __obj * __result;
859  __obj * __current_obj, * __next_obj;
860  size_t __i;
861
862  if (1 == __nobjs)  {
863    return __chunk;
864  }
865
866  __my_free_list = __free_list + _Pthread_alloc_impl::_S_freelist_index(__n);
867
868  /* Build free list in chunk */
869  __result = (__obj *)__chunk;
870  *__my_free_list = __next_obj = (__obj *)(__chunk + __n);
871  for (__i = 1; ; ++__i) {
872    __current_obj = __next_obj;
873    __next_obj = (__obj *)((char *)__next_obj + __n);
874    if (__nobjs - 1 == __i) {
875      __current_obj -> __free_list_link = 0;
876      break;
877    } else {
878      __current_obj -> __free_list_link = __next_obj;
879    }
880  }
881  return __result;
882}
883
884void _Pthread_alloc_impl::_S_destructor(void *__instance) {
885  _M_lock __lock_instance;  // Need to acquire lock here.
886  _Pthread_alloc_per_thread_state* __s = (_Pthread_alloc_per_thread_state*)__instance;
887  __s -> __next = _S_free_per_thread_states;
888  _S_free_per_thread_states = __s;
889}
890
891_Pthread_alloc_per_thread_state* _Pthread_alloc_impl::_S_new_per_thread_state() {
892  /* lock already held here.  */
893  if (0 != _S_free_per_thread_states) {
894    _Pthread_alloc_per_thread_state *__result = _S_free_per_thread_states;
895    _S_free_per_thread_states = _S_free_per_thread_states -> __next;
896    return __result;
897  }
898  else {
899    return new _Pthread_alloc_per_thread_state;
900  }
901}
902
903_Pthread_alloc_per_thread_state* _Pthread_alloc_impl::_S_get_per_thread_state() {
904  int __ret_code;
905  __state_type* __result;
906
907  if (_S_key_initialized && (__result = (__state_type*) pthread_getspecific(_S_key)))
908    return __result;
909
910  /*REFERENCED*/
911  _M_lock __lock_instance;  // Need to acquire lock here.
912  if (!_S_key_initialized) {
913    if (pthread_key_create(&_S_key, _S_destructor)) {
914      _STLP_THROW_BAD_ALLOC;  // failed
915    }
916    _S_key_initialized = true;
917  }
918
919  __result = _S_new_per_thread_state();
920  __ret_code = pthread_setspecific(_S_key, __result);
921  if (__ret_code) {
922    if (__ret_code == ENOMEM) {
923      _STLP_THROW_BAD_ALLOC;
924    } else {
925  // EINVAL
926      _STLP_ABORT();
927    }
928  }
929  return __result;
930}
931
932/* We allocate memory in large chunks in order to avoid fragmenting     */
933/* the malloc heap too much.                                            */
934/* We assume that size is properly aligned.                             */
935char *_Pthread_alloc_impl::_S_chunk_alloc(size_t __p_size, size_t &__nobjs, _Pthread_alloc_per_thread_state *__a) {
936  typedef _Pthread_alloc_obj __obj;
937  {
938    char * __result;
939    size_t __total_bytes;
940    size_t __bytes_left;
941    /*REFERENCED*/
942    _M_lock __lock_instance;         // Acquire lock for this routine
943
944    __total_bytes = __p_size * __nobjs;
945    __bytes_left = _S_end_free - _S_start_free;
946    if (__bytes_left >= __total_bytes) {
947      __result = _S_start_free;
948      _S_start_free += __total_bytes;
949      return __result;
950    } else if (__bytes_left >= __p_size) {
951      __nobjs = __bytes_left/__p_size;
952      __total_bytes = __p_size * __nobjs;
953      __result = _S_start_free;
954      _S_start_free += __total_bytes;
955      return __result;
956    } else {
957      size_t __bytes_to_get = 2 * __total_bytes + _S_round_up(_S_heap_size);
958      // Try to make use of the left-over piece.
959      if (__bytes_left > 0) {
960        __obj * volatile * __my_free_list = __a->__free_list + _S_freelist_index(__bytes_left);
961        ((__obj *)_S_start_free) -> __free_list_link = *__my_free_list;
962        *__my_free_list = (__obj *)_S_start_free;
963      }
964#  ifdef _SGI_SOURCE
965      // Try to get memory that's aligned on something like a
966      // cache line boundary, so as to avoid parceling out
967      // parts of the same line to different threads and thus
968      // possibly different processors.
969      {
970        const int __cache_line_size = 128;  // probable upper bound
971        __bytes_to_get &= ~(__cache_line_size-1);
972        _S_start_free = (char *)memalign(__cache_line_size, __bytes_to_get);
973        if (0 == _S_start_free) {
974          _S_start_free = (char *)__malloc_alloc::allocate(__bytes_to_get);
975        }
976      }
977#  else  /* !SGI_SOURCE */
978      _S_start_free = (char *)__malloc_alloc::allocate(__bytes_to_get);
979#  endif
980      _S_heap_size += __bytes_to_get >> 4;
981      _S_end_free = _S_start_free + __bytes_to_get;
982    }
983  }
984  // lock is released here
985  return _S_chunk_alloc(__p_size, __nobjs, __a);
986}
987
988
989/* n must be > 0      */
990void *_Pthread_alloc_impl::allocate(size_t& __n) {
991  typedef _Pthread_alloc_obj __obj;
992  __obj * volatile * __my_free_list;
993  __obj * __result;
994  __state_type* __a;
995
996  if (__n > _MAX_BYTES) {
997    return __malloc_alloc::allocate(__n);
998  }
999
1000  __n = _S_round_up(__n);
1001  __a = _S_get_per_thread_state();
1002
1003  __my_free_list = __a->__free_list + _S_freelist_index(__n);
1004  __result = *__my_free_list;
1005  if (__result == 0) {
1006    void *__r = __a->_M_refill(__n);
1007    return __r;
1008  }
1009  *__my_free_list = __result->__free_list_link;
1010  return __result;
1011};
1012
1013/* p may not be 0 */
1014void _Pthread_alloc_impl::deallocate(void *__p, size_t __n) {
1015  typedef _Pthread_alloc_obj __obj;
1016  __obj *__q = (__obj *)__p;
1017  __obj * volatile * __my_free_list;
1018  __state_type* __a;
1019
1020  if (__n > _MAX_BYTES) {
1021      __malloc_alloc::deallocate(__p, __n);
1022      return;
1023  }
1024
1025  __a = _S_get_per_thread_state();
1026
1027  __my_free_list = __a->__free_list + _S_freelist_index(__n);
1028  __q -> __free_list_link = *__my_free_list;
1029  *__my_free_list = __q;
1030}
1031
1032// boris : versions for per_thread_allocator
1033/* n must be > 0      */
1034void *_Pthread_alloc_impl::allocate(size_t& __n, __state_type* __a) {
1035  typedef _Pthread_alloc_obj __obj;
1036  __obj * volatile * __my_free_list;
1037  __obj * __result;
1038
1039  if (__n > _MAX_BYTES) {
1040    return __malloc_alloc::allocate(__n);
1041  }
1042  __n = _S_round_up(__n);
1043
1044  // boris : here, we have to lock per thread state, as we may be getting memory from
1045  // different thread pool.
1046  _STLP_auto_lock __lock(__a->_M_lock);
1047
1048  __my_free_list = __a->__free_list + _S_freelist_index(__n);
1049  __result = *__my_free_list;
1050  if (__result == 0) {
1051    void *__r = __a->_M_refill(__n);
1052    return __r;
1053  }
1054  *__my_free_list = __result->__free_list_link;
1055  return __result;
1056};
1057
1058/* p may not be 0 */
1059void _Pthread_alloc_impl::deallocate(void *__p, size_t __n, __state_type* __a) {
1060  typedef _Pthread_alloc_obj __obj;
1061  __obj *__q = (__obj *)__p;
1062  __obj * volatile * __my_free_list;
1063
1064  if (__n > _MAX_BYTES) {
1065    __malloc_alloc::deallocate(__p, __n);
1066    return;
1067  }
1068
1069  // boris : here, we have to lock per thread state, as we may be returning memory from
1070  // different thread.
1071  _STLP_auto_lock __lock(__a->_M_lock);
1072
1073  __my_free_list = __a->__free_list + _S_freelist_index(__n);
1074  __q -> __free_list_link = *__my_free_list;
1075  *__my_free_list = __q;
1076}
1077
1078void *_Pthread_alloc_impl::reallocate(void *__p, size_t __old_sz, size_t& __new_sz) {
1079  void * __result;
1080  size_t __copy_sz;
1081
1082  if (__old_sz > _MAX_BYTES && __new_sz > _MAX_BYTES) {
1083    return realloc(__p, __new_sz);
1084  }
1085
1086  if (_S_round_up(__old_sz) == _S_round_up(__new_sz)) return __p;
1087  __result = allocate(__new_sz);
1088  __copy_sz = __new_sz > __old_sz? __old_sz : __new_sz;
1089  memcpy(__result, __p, __copy_sz);
1090  deallocate(__p, __old_sz);
1091  return __result;
1092}
1093
1094_Pthread_alloc_per_thread_state* _Pthread_alloc_impl::_S_free_per_thread_states = 0;
1095pthread_key_t _Pthread_alloc_impl::_S_key = 0;
1096_STLP_STATIC_MUTEX _Pthread_alloc_impl::_S_chunk_allocator_lock _STLP_MUTEX_INITIALIZER;
1097bool _Pthread_alloc_impl::_S_key_initialized = false;
1098char *_Pthread_alloc_impl::_S_start_free = 0;
1099char *_Pthread_alloc_impl::_S_end_free = 0;
1100size_t _Pthread_alloc_impl::_S_heap_size = 0;
1101
1102void * _STLP_CALL _Pthread_alloc::allocate(size_t& __n)
1103{ return _Pthread_alloc_impl::allocate(__n); }
1104void _STLP_CALL _Pthread_alloc::deallocate(void *__p, size_t __n)
1105{ _Pthread_alloc_impl::deallocate(__p, __n); }
1106void * _STLP_CALL _Pthread_alloc::allocate(size_t& __n, __state_type* __a)
1107{ return _Pthread_alloc_impl::allocate(__n, __a); }
1108void _STLP_CALL _Pthread_alloc::deallocate(void *__p, size_t __n, __state_type* __a)
1109{ _Pthread_alloc_impl::deallocate(__p, __n, __a); }
1110void * _STLP_CALL _Pthread_alloc::reallocate(void *__p, size_t __old_sz, size_t& __new_sz)
1111{ return _Pthread_alloc_impl::reallocate(__p, __old_sz, __new_sz); }
1112_Pthread_alloc_per_thread_state* _STLP_CALL _Pthread_alloc::_S_get_per_thread_state()
1113{ return _Pthread_alloc_impl::_S_get_per_thread_state(); }
1114
1115_STLP_MOVE_TO_STD_NAMESPACE
1116
1117#endif
1118
1119_STLP_END_NAMESPACE
1120
1121#undef _S_FREELIST_INDEX
1122