1// Protocol Buffers - Google's data interchange format
2// Copyright 2012 Google Inc.  All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// This file is an internal atomic implementation, use atomicops.h instead.
32
33#ifndef GOOGLE_PROTOBUF_ATOMICOPS_INTERNALS_X86_GCC_H_
34#define GOOGLE_PROTOBUF_ATOMICOPS_INTERNALS_X86_GCC_H_
35
36namespace google {
37namespace protobuf {
38namespace internal {
39
40// This struct is not part of the public API of this module; clients may not
41// use it.
42// Features of this x86.  Values may not be correct before main() is run,
43// but are set conservatively.
44struct AtomicOps_x86CPUFeatureStruct {
45  bool has_amd_lock_mb_bug;  // Processor has AMD memory-barrier bug; do lfence
46                             // after acquire compare-and-swap.
47  bool has_sse2;             // Processor has SSE2.
48};
49extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
50
51#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
52
53// 32-bit low-level operations on any platform.
54
55inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
56                                         Atomic32 old_value,
57                                         Atomic32 new_value) {
58  Atomic32 prev;
59  __asm__ __volatile__("lock; cmpxchgl %1,%2"
60                       : "=a" (prev)
61                       : "q" (new_value), "m" (*ptr), "0" (old_value)
62                       : "memory");
63  return prev;
64}
65
66inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
67                                         Atomic32 new_value) {
68  __asm__ __volatile__("xchgl %1,%0"  // The lock prefix is implicit for xchg.
69                       : "=r" (new_value)
70                       : "m" (*ptr), "0" (new_value)
71                       : "memory");
72  return new_value;  // Now it's the previous value.
73}
74
75inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
76                                          Atomic32 increment) {
77  Atomic32 temp = increment;
78  __asm__ __volatile__("lock; xaddl %0,%1"
79                       : "+r" (temp), "+m" (*ptr)
80                       : : "memory");
81  // temp now holds the old value of *ptr
82  return temp + increment;
83}
84
85inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
86                                        Atomic32 increment) {
87  Atomic32 temp = increment;
88  __asm__ __volatile__("lock; xaddl %0,%1"
89                       : "+r" (temp), "+m" (*ptr)
90                       : : "memory");
91  // temp now holds the old value of *ptr
92  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
93    __asm__ __volatile__("lfence" : : : "memory");
94  }
95  return temp + increment;
96}
97
98inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
99                                       Atomic32 old_value,
100                                       Atomic32 new_value) {
101  Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
102  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
103    __asm__ __volatile__("lfence" : : : "memory");
104  }
105  return x;
106}
107
108inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
109                                       Atomic32 old_value,
110                                       Atomic32 new_value) {
111  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
112}
113
114inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
115  *ptr = value;
116}
117
118#if defined(__x86_64__)
119
120// 64-bit implementations of memory barrier can be simpler, because it
121// "mfence" is guaranteed to exist.
122inline void MemoryBarrier() {
123  __asm__ __volatile__("mfence" : : : "memory");
124}
125
126inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
127  *ptr = value;
128  MemoryBarrier();
129}
130
131#else
132
133inline void MemoryBarrier() {
134  if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
135    __asm__ __volatile__("mfence" : : : "memory");
136  } else {  // mfence is faster but not present on PIII
137    Atomic32 x = 0;
138    NoBarrier_AtomicExchange(&x, 0);  // acts as a barrier on PIII
139  }
140}
141
142inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
143  if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
144    *ptr = value;
145    __asm__ __volatile__("mfence" : : : "memory");
146  } else {
147    NoBarrier_AtomicExchange(ptr, value);
148                          // acts as a barrier on PIII
149  }
150}
151#endif
152
153inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
154  ATOMICOPS_COMPILER_BARRIER();
155  *ptr = value;  // An x86 store acts as a release barrier.
156  // See comments in Atomic64 version of Release_Store(), below.
157}
158
159inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
160  return *ptr;
161}
162
163inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
164  Atomic32 value = *ptr;  // An x86 load acts as a acquire barrier.
165  // See comments in Atomic64 version of Release_Store(), below.
166  ATOMICOPS_COMPILER_BARRIER();
167  return value;
168}
169
170inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
171  MemoryBarrier();
172  return *ptr;
173}
174
175#if defined(__x86_64__)
176
177// 64-bit low-level operations on 64-bit platform.
178
179inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
180                                         Atomic64 old_value,
181                                         Atomic64 new_value) {
182  Atomic64 prev;
183  __asm__ __volatile__("lock; cmpxchgq %1,%2"
184                       : "=a" (prev)
185                       : "q" (new_value), "m" (*ptr), "0" (old_value)
186                       : "memory");
187  return prev;
188}
189
190inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
191                                         Atomic64 new_value) {
192  __asm__ __volatile__("xchgq %1,%0"  // The lock prefix is implicit for xchg.
193                       : "=r" (new_value)
194                       : "m" (*ptr), "0" (new_value)
195                       : "memory");
196  return new_value;  // Now it's the previous value.
197}
198
199inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
200                                          Atomic64 increment) {
201  Atomic64 temp = increment;
202  __asm__ __volatile__("lock; xaddq %0,%1"
203                       : "+r" (temp), "+m" (*ptr)
204                       : : "memory");
205  // temp now contains the previous value of *ptr
206  return temp + increment;
207}
208
209inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
210                                        Atomic64 increment) {
211  Atomic64 temp = increment;
212  __asm__ __volatile__("lock; xaddq %0,%1"
213                       : "+r" (temp), "+m" (*ptr)
214                       : : "memory");
215  // temp now contains the previous value of *ptr
216  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
217    __asm__ __volatile__("lfence" : : : "memory");
218  }
219  return temp + increment;
220}
221
222inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
223  *ptr = value;
224}
225
226inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
227  *ptr = value;
228  MemoryBarrier();
229}
230
231inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
232  ATOMICOPS_COMPILER_BARRIER();
233
234  *ptr = value;  // An x86 store acts as a release barrier
235                 // for current AMD/Intel chips as of Jan 2008.
236                 // See also Acquire_Load(), below.
237
238  // When new chips come out, check:
239  //  IA-32 Intel Architecture Software Developer's Manual, Volume 3:
240  //  System Programming Guide, Chatper 7: Multiple-processor management,
241  //  Section 7.2, Memory Ordering.
242  // Last seen at:
243  //   http://developer.intel.com/design/pentium4/manuals/index_new.htm
244  //
245  // x86 stores/loads fail to act as barriers for a few instructions (clflush
246  // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
247  // not generated by the compiler, and are rare.  Users of these instructions
248  // need to know about cache behaviour in any case since all of these involve
249  // either flushing cache lines or non-temporal cache hints.
250}
251
252inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
253  return *ptr;
254}
255
256inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
257  Atomic64 value = *ptr;  // An x86 load acts as a acquire barrier,
258                          // for current AMD/Intel chips as of Jan 2008.
259                          // See also Release_Store(), above.
260  ATOMICOPS_COMPILER_BARRIER();
261  return value;
262}
263
264inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
265  MemoryBarrier();
266  return *ptr;
267}
268
269inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
270                                       Atomic64 old_value,
271                                       Atomic64 new_value) {
272  Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
273  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
274    __asm__ __volatile__("lfence" : : : "memory");
275  }
276  return x;
277}
278
279inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
280                                       Atomic64 old_value,
281                                       Atomic64 new_value) {
282  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
283}
284
285#endif  // defined(__x86_64__)
286
287}  // namespace internal
288}  // namespace protobuf
289}  // namespace google
290
291#undef ATOMICOPS_COMPILER_BARRIER
292
293#endif  // GOOGLE_PROTOBUF_ATOMICOPS_INTERNALS_X86_GCC_H_
294