atomic.h revision 051eae8cc591dfa2955cbfa73aae79ab53620c08
1/******************************************************************************/
2#ifdef JEMALLOC_H_TYPES
3
4#endif /* JEMALLOC_H_TYPES */
5/******************************************************************************/
6#ifdef JEMALLOC_H_STRUCTS
7
8#endif /* JEMALLOC_H_STRUCTS */
9/******************************************************************************/
10#ifdef JEMALLOC_H_EXTERNS
11
12#define	atomic_read_uint64(p)	atomic_add_uint64(p, 0)
13#define	atomic_read_uint32(p)	atomic_add_uint32(p, 0)
14#define	atomic_read_p(p)	atomic_add_p(p, NULL)
15#define	atomic_read_z(p)	atomic_add_z(p, 0)
16#define	atomic_read_u(p)	atomic_add_u(p, 0)
17
18#endif /* JEMALLOC_H_EXTERNS */
19/******************************************************************************/
20#ifdef JEMALLOC_H_INLINES
21
22/*
23 * All arithmetic functions return the arithmetic result of the atomic
24 * operation.  Some atomic operation APIs return the value prior to mutation, in
25 * which case the following functions must redundantly compute the result so
26 * that it can be returned.  These functions are normally inlined, so the extra
27 * operations can be optimized away if the return values aren't used by the
28 * callers.
29 *
30 *   <t> atomic_read_<t>(<t> *p) { return (*p); }
31 *   <t> atomic_add_<t>(<t> *p, <t> x) { return (*p + x); }
32 *   <t> atomic_sub_<t>(<t> *p, <t> x) { return (*p - x); }
33 *   bool atomic_cas_<t>(<t> *p, <t> c, <t> s)
34 *   {
35 *     if (*p != c)
36 *       return (true);
37 *     *p = s;
38 *     return (false);
39 *   }
40 *   void atomic_write_<t>(<t> *p, <t> x) { *p = x; }
41 */
42
43#ifndef JEMALLOC_ENABLE_INLINE
44uint64_t	atomic_add_uint64(uint64_t *p, uint64_t x);
45uint64_t	atomic_sub_uint64(uint64_t *p, uint64_t x);
46bool	atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s);
47void	atomic_write_uint64(uint64_t *p, uint64_t x);
48uint32_t	atomic_add_uint32(uint32_t *p, uint32_t x);
49uint32_t	atomic_sub_uint32(uint32_t *p, uint32_t x);
50bool	atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s);
51void	atomic_write_uint32(uint32_t *p, uint32_t x);
52void	*atomic_add_p(void **p, void *x);
53void	*atomic_sub_p(void **p, void *x);
54bool	atomic_cas_p(void **p, void *c, void *s);
55void	atomic_write_p(void **p, void *x);
56size_t	atomic_add_z(size_t *p, size_t x);
57size_t	atomic_sub_z(size_t *p, size_t x);
58bool	atomic_cas_z(size_t *p, size_t c, size_t s);
59void	atomic_write_z(size_t *p, size_t x);
60unsigned	atomic_add_u(unsigned *p, unsigned x);
61unsigned	atomic_sub_u(unsigned *p, unsigned x);
62bool	atomic_cas_u(unsigned *p, unsigned c, unsigned s);
63void	atomic_write_u(unsigned *p, unsigned x);
64#endif
65
66#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
67/******************************************************************************/
68/* 64-bit operations. */
69#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
70#  if (defined(__amd64__) || defined(__x86_64__))
71JEMALLOC_INLINE uint64_t
72atomic_add_uint64(uint64_t *p, uint64_t x)
73{
74	uint64_t t = x;
75
76	asm volatile (
77	    "lock; xaddq %0, %1;"
78	    : "+r" (t), "=m" (*p) /* Outputs. */
79	    : "m" (*p) /* Inputs. */
80	    );
81
82	return (t + x);
83}
84
85JEMALLOC_INLINE uint64_t
86atomic_sub_uint64(uint64_t *p, uint64_t x)
87{
88	uint64_t t;
89
90	x = (uint64_t)(-(int64_t)x);
91	t = x;
92	asm volatile (
93	    "lock; xaddq %0, %1;"
94	    : "+r" (t), "=m" (*p) /* Outputs. */
95	    : "m" (*p) /* Inputs. */
96	    );
97
98	return (t + x);
99}
100
101JEMALLOC_INLINE bool
102atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
103{
104	uint8_t success;
105
106	asm volatile (
107	    "lock; cmpxchgq %4, %0;"
108	    "sete %1;"
109	    : "=m" (*p), "=a" (success) /* Outputs. */
110	    : "m" (*p), "a" (c), "r" (s) /* Inputs. */
111	    : "memory" /* Clobbers. */
112	    );
113
114	return (!(bool)success);
115}
116
117JEMALLOC_INLINE void
118atomic_write_uint64(uint64_t *p, uint64_t x)
119{
120
121	asm volatile (
122	    "xchgq %1, %0;" /* Lock is implied by xchgq. */
123	    : "=m" (*p), "+r" (x) /* Outputs. */
124	    : "m" (*p) /* Inputs. */
125	    : "memory" /* Clobbers. */
126	    );
127}
128#  elif (defined(JEMALLOC_C11ATOMICS))
129JEMALLOC_INLINE uint64_t
130atomic_add_uint64(uint64_t *p, uint64_t x)
131{
132	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
133	return (atomic_fetch_add(a, x) + x);
134}
135
136JEMALLOC_INLINE uint64_t
137atomic_sub_uint64(uint64_t *p, uint64_t x)
138{
139	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
140	return (atomic_fetch_sub(a, x) - x);
141}
142
143JEMALLOC_INLINE bool
144atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
145{
146
147	return (!atomic_compare_exchange_strong(p, &c, s));
148}
149
150JEMALLOC_INLINE void
151atomic_write_uint64(uint64_t *p, uint64_t x)
152{
153
154	atomic_store(p, x);
155}
156#  elif (defined(JEMALLOC_ATOMIC9))
157JEMALLOC_INLINE uint64_t
158atomic_add_uint64(uint64_t *p, uint64_t x)
159{
160
161	/*
162	 * atomic_fetchadd_64() doesn't exist, but we only ever use this
163	 * function on LP64 systems, so atomic_fetchadd_long() will do.
164	 */
165	assert(sizeof(uint64_t) == sizeof(unsigned long));
166
167	return (atomic_fetchadd_long(p, (unsigned long)x) + x);
168}
169
170JEMALLOC_INLINE uint64_t
171atomic_sub_uint64(uint64_t *p, uint64_t x)
172{
173
174	assert(sizeof(uint64_t) == sizeof(unsigned long));
175
176	return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x);
177}
178
179JEMALLOC_INLINE bool
180atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
181{
182
183	assert(sizeof(uint64_t) == sizeof(unsigned long));
184
185	return (!atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s));
186}
187
188JEMALLOC_INLINE void
189atomic_write_uint64(uint64_t *p, uint64_t x)
190{
191
192	assert(sizeof(uint64_t) == sizeof(unsigned long));
193
194	atomic_store_rel_long(p, x);
195}
196#  elif (defined(JEMALLOC_OSATOMIC))
197JEMALLOC_INLINE uint64_t
198atomic_add_uint64(uint64_t *p, uint64_t x)
199{
200
201	return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
202}
203
204JEMALLOC_INLINE uint64_t
205atomic_sub_uint64(uint64_t *p, uint64_t x)
206{
207
208	return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
209}
210
211JEMALLOC_INLINE bool
212atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
213{
214
215	return (!OSAtomicCompareAndSwap64(c, s, (int64_t *)p));
216}
217
218JEMALLOC_INLINE void
219atomic_write_uint64(uint64_t *p, uint64_t x)
220{
221	uint64_t o;
222
223	/*The documented OSAtomic*() API does not expose an atomic exchange. */
224	do {
225		o = atomic_read_uint64(p);
226	} while (atomic_cas_uint64(p, o, x));
227}
228#  elif (defined(_MSC_VER))
229JEMALLOC_INLINE uint64_t
230atomic_add_uint64(uint64_t *p, uint64_t x)
231{
232
233	return (InterlockedExchangeAdd64(p, x) + x);
234}
235
236JEMALLOC_INLINE uint64_t
237atomic_sub_uint64(uint64_t *p, uint64_t x)
238{
239
240	return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x);
241}
242
243JEMALLOC_INLINE bool
244atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
245{
246	uint64_t o;
247
248	o = InterlockedCompareExchange64(p, s, c);
249	return (o != c);
250}
251
252JEMALLOC_INLINE void
253atomic_write_uint64(uint64_t *p, uint64_t x)
254{
255
256	InterlockedExchange64(p, x);
257}
258#  elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
259    defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
260JEMALLOC_INLINE uint64_t
261atomic_add_uint64(uint64_t *p, uint64_t x)
262{
263
264	return (__sync_add_and_fetch(p, x));
265}
266
267JEMALLOC_INLINE uint64_t
268atomic_sub_uint64(uint64_t *p, uint64_t x)
269{
270
271	return (__sync_sub_and_fetch(p, x));
272}
273
274JEMALLOC_INLINE bool
275atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
276{
277
278	return (!__sync_bool_compare_and_swap(p, c, s));
279}
280
281JEMALLOC_INLINE void
282atomic_write_uint64(uint64_t *p, uint64_t x)
283{
284
285	__sync_lock_test_and_set(p, x);
286}
287#  else
288#    error "Missing implementation for 64-bit atomic operations"
289#  endif
290#endif
291
292/******************************************************************************/
293/* 32-bit operations. */
294#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
295JEMALLOC_INLINE uint32_t
296atomic_add_uint32(uint32_t *p, uint32_t x)
297{
298	uint32_t t = x;
299
300	asm volatile (
301	    "lock; xaddl %0, %1;"
302	    : "+r" (t), "=m" (*p) /* Outputs. */
303	    : "m" (*p) /* Inputs. */
304	    );
305
306	return (t + x);
307}
308
309JEMALLOC_INLINE uint32_t
310atomic_sub_uint32(uint32_t *p, uint32_t x)
311{
312	uint32_t t;
313
314	x = (uint32_t)(-(int32_t)x);
315	t = x;
316	asm volatile (
317	    "lock; xaddl %0, %1;"
318	    : "+r" (t), "=m" (*p) /* Outputs. */
319	    : "m" (*p) /* Inputs. */
320	    );
321
322	return (t + x);
323}
324
325JEMALLOC_INLINE bool
326atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
327{
328	uint8_t success;
329
330	asm volatile (
331	    "lock; cmpxchgl %4, %0;"
332	    "sete %1;"
333	    : "=m" (*p), "=a" (success) /* Outputs. */
334	    : "m" (*p), "a" (c), "r" (s) /* Inputs. */
335	    : "memory"
336	    );
337
338	return (!(bool)success);
339}
340
341JEMALLOC_INLINE void
342atomic_write_uint32(uint32_t *p, uint32_t x)
343{
344
345	asm volatile (
346	    "xchgl %1, %0;" /* Lock is implied by xchgl. */
347	    : "=m" (*p), "+r" (x) /* Outputs. */
348	    : "m" (*p) /* Inputs. */
349	    : "memory" /* Clobbers. */
350	    );
351}
352#  elif (defined(JEMALLOC_C11ATOMICS))
353JEMALLOC_INLINE uint32_t
354atomic_add_uint32(uint32_t *p, uint32_t x)
355{
356	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
357	return (atomic_fetch_add(a, x) + x);
358}
359
360JEMALLOC_INLINE uint32_t
361atomic_sub_uint32(uint32_t *p, uint32_t x)
362{
363	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
364	return (atomic_fetch_sub(a, x) - x);
365}
366
367JEMALLOC_INLINE bool
368atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
369{
370
371	return (!atomic_compare_exchange_strong(p, &c, s));
372}
373
374JEMALLOC_INLINE void
375atomic_write_uint32(uint32_t *p, uint32_t x)
376{
377
378	atomic_store(p, x);
379}
380#elif (defined(JEMALLOC_ATOMIC9))
381JEMALLOC_INLINE uint32_t
382atomic_add_uint32(uint32_t *p, uint32_t x)
383{
384
385	return (atomic_fetchadd_32(p, x) + x);
386}
387
388JEMALLOC_INLINE uint32_t
389atomic_sub_uint32(uint32_t *p, uint32_t x)
390{
391
392	return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x);
393}
394
395JEMALLOC_INLINE bool
396atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
397{
398
399	return (!atomic_cmpset_32(p, c, s));
400}
401
402JEMALLOC_INLINE void
403atomic_write_uint32(uint32_t *p, uint32_t x)
404{
405
406	atomic_store_rel_32(p, x);
407}
408#elif (defined(JEMALLOC_OSATOMIC))
409JEMALLOC_INLINE uint32_t
410atomic_add_uint32(uint32_t *p, uint32_t x)
411{
412
413	return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
414}
415
416JEMALLOC_INLINE uint32_t
417atomic_sub_uint32(uint32_t *p, uint32_t x)
418{
419
420	return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
421}
422
423JEMALLOC_INLINE bool
424atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
425{
426
427	return (!OSAtomicCompareAndSwap32(c, s, (int32_t *)p));
428}
429
430JEMALLOC_INLINE void
431atomic_write_uint32(uint32_t *p, uint32_t x)
432{
433	uint32_t o;
434
435	/*The documented OSAtomic*() API does not expose an atomic exchange. */
436	do {
437		o = atomic_read_uint32(p);
438	} while (atomic_cas_uint32(p, o, x));
439}
440#elif (defined(_MSC_VER))
441JEMALLOC_INLINE uint32_t
442atomic_add_uint32(uint32_t *p, uint32_t x)
443{
444
445	return (InterlockedExchangeAdd(p, x) + x);
446}
447
448JEMALLOC_INLINE uint32_t
449atomic_sub_uint32(uint32_t *p, uint32_t x)
450{
451
452	return (InterlockedExchangeAdd(p, -((int32_t)x)) - x);
453}
454
455JEMALLOC_INLINE bool
456atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
457{
458	uint32_t o;
459
460	o = InterlockedCompareExchange32(p, s, c);
461	return (o != c);
462}
463
464JEMALLOC_INLINE void
465atomic_write_uint32(uint32_t *p, uint32_t x)
466{
467
468	InterlockedExchange(p, x);
469}
470#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \
471 defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
472JEMALLOC_INLINE uint32_t
473atomic_add_uint32(uint32_t *p, uint32_t x)
474{
475
476	return (__sync_add_and_fetch(p, x));
477}
478
479JEMALLOC_INLINE uint32_t
480atomic_sub_uint32(uint32_t *p, uint32_t x)
481{
482
483	return (__sync_sub_and_fetch(p, x));
484}
485
486JEMALLOC_INLINE bool
487atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
488{
489
490	return (!__sync_bool_compare_and_swap(p, c, s));
491}
492
493JEMALLOC_INLINE void
494atomic_write_uint32(uint32_t *p, uint32_t x)
495{
496
497	__sync_lock_test_and_set(p, x);
498}
499#else
500#  error "Missing implementation for 32-bit atomic operations"
501#endif
502
503/******************************************************************************/
504/* Pointer operations. */
505JEMALLOC_INLINE void *
506atomic_add_p(void **p, void *x)
507{
508
509#if (LG_SIZEOF_PTR == 3)
510	return ((void *)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
511#elif (LG_SIZEOF_PTR == 2)
512	return ((void *)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
513#endif
514}
515
516JEMALLOC_INLINE void *
517atomic_sub_p(void **p, void *x)
518{
519
520#if (LG_SIZEOF_PTR == 3)
521	return ((void *)atomic_add_uint64((uint64_t *)p,
522	    (uint64_t)-((int64_t)x)));
523#elif (LG_SIZEOF_PTR == 2)
524	return ((void *)atomic_add_uint32((uint32_t *)p,
525	    (uint32_t)-((int32_t)x)));
526#endif
527}
528
529JEMALLOC_INLINE bool
530atomic_cas_p(void **p, void *c, void *s)
531{
532
533#if (LG_SIZEOF_PTR == 3)
534	return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
535#elif (LG_SIZEOF_PTR == 2)
536	return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
537#endif
538}
539
540JEMALLOC_INLINE void
541atomic_write_p(void **p, void *x)
542{
543
544#if (LG_SIZEOF_PTR == 3)
545	atomic_write_uint64((uint64_t *)p, (uint64_t)x);
546#elif (LG_SIZEOF_PTR == 2)
547	atomic_write_uint32((uint32_t *)p, (uint32_t)x);
548#endif
549}
550
551/******************************************************************************/
552/* size_t operations. */
553JEMALLOC_INLINE size_t
554atomic_add_z(size_t *p, size_t x)
555{
556
557#if (LG_SIZEOF_PTR == 3)
558	return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
559#elif (LG_SIZEOF_PTR == 2)
560	return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
561#endif
562}
563
564JEMALLOC_INLINE size_t
565atomic_sub_z(size_t *p, size_t x)
566{
567
568#if (LG_SIZEOF_PTR == 3)
569	return ((size_t)atomic_add_uint64((uint64_t *)p,
570	    (uint64_t)-((int64_t)x)));
571#elif (LG_SIZEOF_PTR == 2)
572	return ((size_t)atomic_add_uint32((uint32_t *)p,
573	    (uint32_t)-((int32_t)x)));
574#endif
575}
576
577JEMALLOC_INLINE bool
578atomic_cas_z(size_t *p, size_t c, size_t s)
579{
580
581#if (LG_SIZEOF_PTR == 3)
582	return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
583#elif (LG_SIZEOF_PTR == 2)
584	return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
585#endif
586}
587
588JEMALLOC_INLINE void
589atomic_write_z(size_t *p, size_t x)
590{
591
592#if (LG_SIZEOF_PTR == 3)
593	atomic_write_uint64((uint64_t *)p, (uint64_t)x);
594#elif (LG_SIZEOF_PTR == 2)
595	atomic_write_uint32((uint32_t *)p, (uint32_t)x);
596#endif
597}
598
599/******************************************************************************/
600/* unsigned operations. */
601JEMALLOC_INLINE unsigned
602atomic_add_u(unsigned *p, unsigned x)
603{
604
605#if (LG_SIZEOF_INT == 3)
606	return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
607#elif (LG_SIZEOF_INT == 2)
608	return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
609#endif
610}
611
612JEMALLOC_INLINE unsigned
613atomic_sub_u(unsigned *p, unsigned x)
614{
615
616#if (LG_SIZEOF_INT == 3)
617	return ((unsigned)atomic_add_uint64((uint64_t *)p,
618	    (uint64_t)-((int64_t)x)));
619#elif (LG_SIZEOF_INT == 2)
620	return ((unsigned)atomic_add_uint32((uint32_t *)p,
621	    (uint32_t)-((int32_t)x)));
622#endif
623}
624
625JEMALLOC_INLINE bool
626atomic_cas_u(unsigned *p, unsigned c, unsigned s)
627{
628
629#if (LG_SIZEOF_INT == 3)
630	return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
631#elif (LG_SIZEOF_INT == 2)
632	return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
633#endif
634}
635
636JEMALLOC_INLINE void
637atomic_write_u(unsigned *p, unsigned x)
638{
639
640#if (LG_SIZEOF_INT == 3)
641	atomic_write_uint64((uint64_t *)p, (uint64_t)x);
642#elif (LG_SIZEOF_INT == 2)
643	atomic_write_uint32((uint32_t *)p, (uint32_t)x);
644#endif
645}
646
647/******************************************************************************/
648#endif
649
650#endif /* JEMALLOC_H_INLINES */
651/******************************************************************************/
652