1// Copyright 2010 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28// This file is an internal atomic implementation, use atomicops.h instead.
29
30#ifndef V8_ATOMICOPS_INTERNALS_X86_GCC_H_
31#define V8_ATOMICOPS_INTERNALS_X86_GCC_H_
32
33namespace v8 {
34namespace internal {
35
36// This struct is not part of the public API of this module; clients may not
37// use it.
38// Features of this x86.  Values may not be correct before main() is run,
39// but are set conservatively.
40struct AtomicOps_x86CPUFeatureStruct {
41  bool has_amd_lock_mb_bug;  // Processor has AMD memory-barrier bug; do lfence
42                             // after acquire compare-and-swap.
43  bool has_sse2;             // Processor has SSE2.
44};
45extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
46
47#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
48
49// 32-bit low-level operations on any platform.
50
51inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
52                                         Atomic32 old_value,
53                                         Atomic32 new_value) {
54  Atomic32 prev;
55  __asm__ __volatile__("lock; cmpxchgl %1,%2"
56                       : "=a" (prev)
57                       : "q" (new_value), "m" (*ptr), "0" (old_value)
58                       : "memory");
59  return prev;
60}
61
62inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
63                                         Atomic32 new_value) {
64  __asm__ __volatile__("xchgl %1,%0"  // The lock prefix is implicit for xchg.
65                       : "=r" (new_value)
66                       : "m" (*ptr), "0" (new_value)
67                       : "memory");
68  return new_value;  // Now it's the previous value.
69}
70
71inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
72                                          Atomic32 increment) {
73  Atomic32 temp = increment;
74  __asm__ __volatile__("lock; xaddl %0,%1"
75                       : "+r" (temp), "+m" (*ptr)
76                       : : "memory");
77  // temp now holds the old value of *ptr
78  return temp + increment;
79}
80
81inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
82                                        Atomic32 increment) {
83  Atomic32 temp = increment;
84  __asm__ __volatile__("lock; xaddl %0,%1"
85                       : "+r" (temp), "+m" (*ptr)
86                       : : "memory");
87  // temp now holds the old value of *ptr
88  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
89    __asm__ __volatile__("lfence" : : : "memory");
90  }
91  return temp + increment;
92}
93
94inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
95                                       Atomic32 old_value,
96                                       Atomic32 new_value) {
97  Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
98  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
99    __asm__ __volatile__("lfence" : : : "memory");
100  }
101  return x;
102}
103
104inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
105                                       Atomic32 old_value,
106                                       Atomic32 new_value) {
107  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
108}
109
110inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
111  *ptr = value;
112}
113
114#if defined(__x86_64__)
115
116// 64-bit implementations of memory barrier can be simpler, because it
117// "mfence" is guaranteed to exist.
118inline void MemoryBarrier() {
119  __asm__ __volatile__("mfence" : : : "memory");
120}
121
122inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
123  *ptr = value;
124  MemoryBarrier();
125}
126
127#else
128
129inline void MemoryBarrier() {
130  if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
131    __asm__ __volatile__("mfence" : : : "memory");
132  } else {  // mfence is faster but not present on PIII
133    Atomic32 x = 0;
134    NoBarrier_AtomicExchange(&x, 0);  // acts as a barrier on PIII
135  }
136}
137
138inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
139  if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
140    *ptr = value;
141    __asm__ __volatile__("mfence" : : : "memory");
142  } else {
143    NoBarrier_AtomicExchange(ptr, value);
144                          // acts as a barrier on PIII
145  }
146}
147#endif
148
149inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
150  ATOMICOPS_COMPILER_BARRIER();
151  *ptr = value;  // An x86 store acts as a release barrier.
152  // See comments in Atomic64 version of Release_Store(), below.
153}
154
155inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
156  return *ptr;
157}
158
159inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
160  Atomic32 value = *ptr;  // An x86 load acts as a acquire barrier.
161  // See comments in Atomic64 version of Release_Store(), below.
162  ATOMICOPS_COMPILER_BARRIER();
163  return value;
164}
165
166inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
167  MemoryBarrier();
168  return *ptr;
169}
170
171#if defined(__x86_64__)
172
173// 64-bit low-level operations on 64-bit platform.
174
175inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
176                                         Atomic64 old_value,
177                                         Atomic64 new_value) {
178  Atomic64 prev;
179  __asm__ __volatile__("lock; cmpxchgq %1,%2"
180                       : "=a" (prev)
181                       : "q" (new_value), "m" (*ptr), "0" (old_value)
182                       : "memory");
183  return prev;
184}
185
186inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
187                                         Atomic64 new_value) {
188  __asm__ __volatile__("xchgq %1,%0"  // The lock prefix is implicit for xchg.
189                       : "=r" (new_value)
190                       : "m" (*ptr), "0" (new_value)
191                       : "memory");
192  return new_value;  // Now it's the previous value.
193}
194
195inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
196                                          Atomic64 increment) {
197  Atomic64 temp = increment;
198  __asm__ __volatile__("lock; xaddq %0,%1"
199                       : "+r" (temp), "+m" (*ptr)
200                       : : "memory");
201  // temp now contains the previous value of *ptr
202  return temp + increment;
203}
204
205inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
206                                        Atomic64 increment) {
207  Atomic64 temp = increment;
208  __asm__ __volatile__("lock; xaddq %0,%1"
209                       : "+r" (temp), "+m" (*ptr)
210                       : : "memory");
211  // temp now contains the previous value of *ptr
212  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
213    __asm__ __volatile__("lfence" : : : "memory");
214  }
215  return temp + increment;
216}
217
218inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
219  *ptr = value;
220}
221
222inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
223  *ptr = value;
224  MemoryBarrier();
225}
226
227inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
228  ATOMICOPS_COMPILER_BARRIER();
229
230  *ptr = value;  // An x86 store acts as a release barrier
231                 // for current AMD/Intel chips as of Jan 2008.
232                 // See also Acquire_Load(), below.
233
234  // When new chips come out, check:
235  //  IA-32 Intel Architecture Software Developer's Manual, Volume 3:
236  //  System Programming Guide, Chatper 7: Multiple-processor management,
237  //  Section 7.2, Memory Ordering.
238  // Last seen at:
239  //   http://developer.intel.com/design/pentium4/manuals/index_new.htm
240  //
241  // x86 stores/loads fail to act as barriers for a few instructions (clflush
242  // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
243  // not generated by the compiler, and are rare.  Users of these instructions
244  // need to know about cache behaviour in any case since all of these involve
245  // either flushing cache lines or non-temporal cache hints.
246}
247
248inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
249  return *ptr;
250}
251
252inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
253  Atomic64 value = *ptr;  // An x86 load acts as a acquire barrier,
254                          // for current AMD/Intel chips as of Jan 2008.
255                          // See also Release_Store(), above.
256  ATOMICOPS_COMPILER_BARRIER();
257  return value;
258}
259
260inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
261  MemoryBarrier();
262  return *ptr;
263}
264
265inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
266                                       Atomic64 old_value,
267                                       Atomic64 new_value) {
268  Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
269  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
270    __asm__ __volatile__("lfence" : : : "memory");
271  }
272  return x;
273}
274
275inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
276                                       Atomic64 old_value,
277                                       Atomic64 new_value) {
278  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
279}
280
281#endif  // defined(__x86_64__)
282
283} }  // namespace v8::internal
284
285#undef ATOMICOPS_COMPILER_BARRIER
286
287#endif  // V8_ATOMICOPS_INTERNALS_X86_GCC_H_
288