1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// This file is an internal atomic implementation, use base/atomicops.h instead.
6
7#ifndef BASE_ATOMICOPS_INTERNALS_X86_GCC_H_
8#define BASE_ATOMICOPS_INTERNALS_X86_GCC_H_
9#pragma once
10
11// This struct is not part of the public API of this module; clients may not
12// use it.
13// Features of this x86.  Values may not be correct before main() is run,
14// but are set conservatively.
15struct AtomicOps_x86CPUFeatureStruct {
16  bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence
17                            // after acquire compare-and-swap.
18  bool has_sse2;            // Processor has SSE2.
19};
20extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
21
22#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
23
24namespace base {
25namespace subtle {
26
27// 32-bit low-level operations on any platform.
28
29inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
30                                         Atomic32 old_value,
31                                         Atomic32 new_value) {
32  Atomic32 prev;
33  __asm__ __volatile__("lock; cmpxchgl %1,%2"
34                       : "=a" (prev)
35                       : "q" (new_value), "m" (*ptr), "0" (old_value)
36                       : "memory");
37  return prev;
38}
39
40inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
41                                         Atomic32 new_value) {
42  __asm__ __volatile__("xchgl %1,%0"  // The lock prefix is implicit for xchg.
43                       : "=r" (new_value)
44                       : "m" (*ptr), "0" (new_value)
45                       : "memory");
46  return new_value;  // Now it's the previous value.
47}
48
49inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
50                                          Atomic32 increment) {
51  Atomic32 temp = increment;
52  __asm__ __volatile__("lock; xaddl %0,%1"
53                       : "+r" (temp), "+m" (*ptr)
54                       : : "memory");
55  // temp now holds the old value of *ptr
56  return temp + increment;
57}
58
59inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
60                                        Atomic32 increment) {
61  Atomic32 temp = increment;
62  __asm__ __volatile__("lock; xaddl %0,%1"
63                       : "+r" (temp), "+m" (*ptr)
64                       : : "memory");
65  // temp now holds the old value of *ptr
66  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
67    __asm__ __volatile__("lfence" : : : "memory");
68  }
69  return temp + increment;
70}
71
72inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
73                                       Atomic32 old_value,
74                                       Atomic32 new_value) {
75  Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
76  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
77    __asm__ __volatile__("lfence" : : : "memory");
78  }
79  return x;
80}
81
82inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
83                                       Atomic32 old_value,
84                                       Atomic32 new_value) {
85  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
86}
87
88inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
89  *ptr = value;
90}
91
92#if defined(__x86_64__)
93
94// 64-bit implementations of memory barrier can be simpler, because it
95// "mfence" is guaranteed to exist.
96inline void MemoryBarrier() {
97  __asm__ __volatile__("mfence" : : : "memory");
98}
99
100inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
101  *ptr = value;
102  MemoryBarrier();
103}
104
105#else
106
107inline void MemoryBarrier() {
108  if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
109    __asm__ __volatile__("mfence" : : : "memory");
110  } else { // mfence is faster but not present on PIII
111    Atomic32 x = 0;
112    NoBarrier_AtomicExchange(&x, 0);  // acts as a barrier on PIII
113  }
114}
115
116inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
117  if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
118    *ptr = value;
119    __asm__ __volatile__("mfence" : : : "memory");
120  } else {
121    NoBarrier_AtomicExchange(ptr, value);
122                          // acts as a barrier on PIII
123  }
124}
125#endif
126
127inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
128  ATOMICOPS_COMPILER_BARRIER();
129  *ptr = value; // An x86 store acts as a release barrier.
130  // See comments in Atomic64 version of Release_Store(), below.
131}
132
133inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
134  return *ptr;
135}
136
137inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
138  Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.
139  // See comments in Atomic64 version of Release_Store(), below.
140  ATOMICOPS_COMPILER_BARRIER();
141  return value;
142}
143
144inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
145  MemoryBarrier();
146  return *ptr;
147}
148
149#if defined(__x86_64__)
150
151// 64-bit low-level operations on 64-bit platform.
152
153inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
154                                         Atomic64 old_value,
155                                         Atomic64 new_value) {
156  Atomic64 prev;
157  __asm__ __volatile__("lock; cmpxchgq %1,%2"
158                       : "=a" (prev)
159                       : "q" (new_value), "m" (*ptr), "0" (old_value)
160                       : "memory");
161  return prev;
162}
163
164inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
165                                         Atomic64 new_value) {
166  __asm__ __volatile__("xchgq %1,%0"  // The lock prefix is implicit for xchg.
167                       : "=r" (new_value)
168                       : "m" (*ptr), "0" (new_value)
169                       : "memory");
170  return new_value;  // Now it's the previous value.
171}
172
173inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
174                                          Atomic64 increment) {
175  Atomic64 temp = increment;
176  __asm__ __volatile__("lock; xaddq %0,%1"
177                       : "+r" (temp), "+m" (*ptr)
178                       : : "memory");
179  // temp now contains the previous value of *ptr
180  return temp + increment;
181}
182
183inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
184                                        Atomic64 increment) {
185  Atomic64 temp = increment;
186  __asm__ __volatile__("lock; xaddq %0,%1"
187                       : "+r" (temp), "+m" (*ptr)
188                       : : "memory");
189  // temp now contains the previous value of *ptr
190  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
191    __asm__ __volatile__("lfence" : : : "memory");
192  }
193  return temp + increment;
194}
195
196inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
197  *ptr = value;
198}
199
200inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
201  *ptr = value;
202  MemoryBarrier();
203}
204
205inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
206  ATOMICOPS_COMPILER_BARRIER();
207
208  *ptr = value; // An x86 store acts as a release barrier
209                // for current AMD/Intel chips as of Jan 2008.
210                // See also Acquire_Load(), below.
211
212  // When new chips come out, check:
213  //  IA-32 Intel Architecture Software Developer's Manual, Volume 3:
214  //  System Programming Guide, Chatper 7: Multiple-processor management,
215  //  Section 7.2, Memory Ordering.
216  // Last seen at:
217  //   http://developer.intel.com/design/pentium4/manuals/index_new.htm
218  //
219  // x86 stores/loads fail to act as barriers for a few instructions (clflush
220  // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
221  // not generated by the compiler, and are rare.  Users of these instructions
222  // need to know about cache behaviour in any case since all of these involve
223  // either flushing cache lines or non-temporal cache hints.
224}
225
226inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
227  return *ptr;
228}
229
230inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
231  Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,
232                         // for current AMD/Intel chips as of Jan 2008.
233                         // See also Release_Store(), above.
234  ATOMICOPS_COMPILER_BARRIER();
235  return value;
236}
237
238inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
239  MemoryBarrier();
240  return *ptr;
241}
242
243inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
244                                       Atomic64 old_value,
245                                       Atomic64 new_value) {
246  Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
247  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
248    __asm__ __volatile__("lfence" : : : "memory");
249  }
250  return x;
251}
252
253inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
254                                       Atomic64 old_value,
255                                       Atomic64 new_value) {
256  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
257}
258
259#endif  // defined(__x86_64__)
260
261} // namespace base::subtle
262} // namespace base
263
264#undef ATOMICOPS_COMPILER_BARRIER
265
266#endif  // BASE_ATOMICOPS_INTERNALS_X86_GCC_H_
267