1/* Copyright (c) 2006, Google Inc. 2 * All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * --- 31 * Author: Sanjay Ghemawat 32 */ 33 34// Implementation of atomic operations for x86. This file should not 35// be included directly. Clients should instead include 36// "base/atomicops.h". 37 38#ifndef BASE_ATOMICOPS_INTERNALS_X86_H_ 39#define BASE_ATOMICOPS_INTERNALS_X86_H_ 40 41typedef int32_t Atomic32; 42#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* 43 44 45// NOTE(vchen): x86 does not need to define AtomicWordCastType, because it 46// already matches Atomic32 or Atomic64, depending on the platform. 47 48 49// This struct is not part of the public API of this module; clients may not 50// use it. 51// Features of this x86. Values may not be correct before main() is run, 52// but are set conservatively. 53struct AtomicOps_x86CPUFeatureStruct { 54 bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence 55 // after acquire compare-and-swap. 56 bool has_sse2; // Processor has SSE2. 57 bool has_cmpxchg16b; // Processor supports cmpxchg16b instruction. 58}; 59extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures; 60 61 62#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") 63 64 65namespace base { 66namespace subtle { 67 68typedef int64_t Atomic64; 69 70// 32-bit low-level operations on any platform. 71 72inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, 73 Atomic32 old_value, 74 Atomic32 new_value) { 75 Atomic32 prev; 76 __asm__ __volatile__("lock; cmpxchgl %1,%2" 77 : "=a" (prev) 78 : "q" (new_value), "m" (*ptr), "0" (old_value) 79 : "memory"); 80 return prev; 81} 82 83inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, 84 Atomic32 new_value) { 85 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. 86 : "=r" (new_value) 87 : "m" (*ptr), "0" (new_value) 88 : "memory"); 89 return new_value; // Now it's the previous value. 90} 91 92inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, 93 Atomic32 increment) { 94 Atomic32 temp = increment; 95 __asm__ __volatile__("lock; xaddl %0,%1" 96 : "+r" (temp), "+m" (*ptr) 97 : : "memory"); 98 // temp now holds the old value of *ptr 99 return temp + increment; 100} 101 102inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, 103 Atomic32 increment) { 104 Atomic32 temp = increment; 105 __asm__ __volatile__("lock; xaddl %0,%1" 106 : "+r" (temp), "+m" (*ptr) 107 : : "memory"); 108 // temp now holds the old value of *ptr 109 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 110 __asm__ __volatile__("lfence" : : : "memory"); 111 } 112 return temp + increment; 113} 114 115inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, 116 Atomic32 old_value, 117 Atomic32 new_value) { 118 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); 119 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 120 __asm__ __volatile__("lfence" : : : "memory"); 121 } 122 return x; 123} 124 125inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, 126 Atomic32 old_value, 127 Atomic32 new_value) { 128 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 129} 130 131inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { 132 *ptr = value; 133} 134 135#if defined(__x86_64__) 136 137// 64-bit implementations of memory barrier can be simpler, because it 138// "mfence" is guaranteed to exist. 139inline void MemoryBarrier() { 140 __asm__ __volatile__("mfence" : : : "memory"); 141} 142 143inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { 144 *ptr = value; 145 MemoryBarrier(); 146} 147 148#else 149 150inline void MemoryBarrier() { 151 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { 152 __asm__ __volatile__("mfence" : : : "memory"); 153 } else { // mfence is faster but not present on PIII 154 Atomic32 x = 0; 155 NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII 156 } 157} 158 159inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { 160 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { 161 *ptr = value; 162 __asm__ __volatile__("mfence" : : : "memory"); 163 } else { 164 NoBarrier_AtomicExchange(ptr, value); 165 // acts as a barrier on PIII 166 } 167} 168#endif 169 170inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { 171 ATOMICOPS_COMPILER_BARRIER(); 172 *ptr = value; // An x86 store acts as a release barrier. 173 // See comments in Atomic64 version of Release_Store(), below. 174} 175 176inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { 177 return *ptr; 178} 179 180inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { 181 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. 182 // See comments in Atomic64 version of Release_Store(), below. 183 ATOMICOPS_COMPILER_BARRIER(); 184 return value; 185} 186 187inline Atomic32 Release_Load(volatile const Atomic32* ptr) { 188 MemoryBarrier(); 189 return *ptr; 190} 191 192#if defined(__x86_64__) 193 194// 64-bit low-level operations on 64-bit platform. 195 196inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, 197 Atomic64 old_value, 198 Atomic64 new_value) { 199 Atomic64 prev; 200 __asm__ __volatile__("lock; cmpxchgq %1,%2" 201 : "=a" (prev) 202 : "q" (new_value), "m" (*ptr), "0" (old_value) 203 : "memory"); 204 return prev; 205} 206 207inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, 208 Atomic64 new_value) { 209 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. 210 : "=r" (new_value) 211 : "m" (*ptr), "0" (new_value) 212 : "memory"); 213 return new_value; // Now it's the previous value. 214} 215 216inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, 217 Atomic64 increment) { 218 Atomic64 temp = increment; 219 __asm__ __volatile__("lock; xaddq %0,%1" 220 : "+r" (temp), "+m" (*ptr) 221 : : "memory"); 222 // temp now contains the previous value of *ptr 223 return temp + increment; 224} 225 226inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, 227 Atomic64 increment) { 228 Atomic64 temp = increment; 229 __asm__ __volatile__("lock; xaddq %0,%1" 230 : "+r" (temp), "+m" (*ptr) 231 : : "memory"); 232 // temp now contains the previous value of *ptr 233 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 234 __asm__ __volatile__("lfence" : : : "memory"); 235 } 236 return temp + increment; 237} 238 239inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { 240 *ptr = value; 241} 242 243inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { 244 *ptr = value; 245 MemoryBarrier(); 246} 247 248inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { 249 ATOMICOPS_COMPILER_BARRIER(); 250 251 *ptr = value; // An x86 store acts as a release barrier 252 // for current AMD/Intel chips as of Jan 2008. 253 // See also Acquire_Load(), below. 254 255 // When new chips come out, check: 256 // IA-32 Intel Architecture Software Developer's Manual, Volume 3: 257 // System Programming Guide, Chatper 7: Multiple-processor management, 258 // Section 7.2, Memory Ordering. 259 // Last seen at: 260 // http://developer.intel.com/design/pentium4/manuals/index_new.htm 261 // 262 // x86 stores/loads fail to act as barriers for a few instructions (clflush 263 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are 264 // not generated by the compiler, and are rare. Users of these instructions 265 // need to know about cache behaviour in any case since all of these involve 266 // either flushing cache lines or non-temporal cache hints. 267} 268 269inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { 270 return *ptr; 271} 272 273inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { 274 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, 275 // for current AMD/Intel chips as of Jan 2008. 276 // See also Release_Store(), above. 277 ATOMICOPS_COMPILER_BARRIER(); 278 return value; 279} 280 281inline Atomic64 Release_Load(volatile const Atomic64* ptr) { 282 MemoryBarrier(); 283 return *ptr; 284} 285 286#else // defined(__x86_64__) 287 288// 64-bit low-level operations on 32-bit platform. 289 290#if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) 291// For compilers older than gcc 4.1, we use inline asm. 292// 293// Potential pitfalls: 294// 295// 1. %ebx points to Global offset table (GOT) with -fPIC. 296// We need to preserve this register. 297// 2. When explicit registers are used in inline asm, the 298// compiler may not be aware of it and might try to reuse 299// the same register for another argument which has constraints 300// that allow it ("r" for example). 301 302inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr, 303 Atomic64 old_value, 304 Atomic64 new_value) { 305 Atomic64 prev; 306 __asm__ __volatile__("push %%ebx\n\t" 307 "movl (%3), %%ebx\n\t" // Move 64-bit new_value into 308 "movl 4(%3), %%ecx\n\t" // ecx:ebx 309 "lock; cmpxchg8b (%1)\n\t"// If edx:eax (old_value) same 310 "pop %%ebx\n\t" 311 : "=A" (prev) // as contents of ptr: 312 : "D" (ptr), // ecx:ebx => ptr 313 "0" (old_value), // else: 314 "S" (&new_value) // old *ptr => edx:eax 315 : "memory", "%ecx"); 316 return prev; 317} 318#endif // Compiler < gcc-4.1 319 320inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, 321 Atomic64 old_val, 322 Atomic64 new_val) { 323 return __sync_val_compare_and_swap(ptr, old_val, new_val); 324} 325 326inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, 327 Atomic64 new_val) { 328 Atomic64 old_val; 329 330 do { 331 old_val = *ptr; 332 } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); 333 334 return old_val; 335} 336 337inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, 338 Atomic64 increment) { 339 Atomic64 old_val, new_val; 340 341 do { 342 old_val = *ptr; 343 new_val = old_val + increment; 344 } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); 345 346 return old_val + increment; 347} 348 349inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, 350 Atomic64 increment) { 351 Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment); 352 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 353 __asm__ __volatile__("lfence" : : : "memory"); 354 } 355 return new_val; 356} 357 358inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { 359 __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic 360 "movq %%mm0, %0\n\t" // moves (ptr could be read-only) 361 "emms\n\t" // Empty mmx state/Reset FP regs 362 : "=m" (*ptr) 363 : "m" (value) 364 : // mark the FP stack and mmx registers as clobbered 365 "st", "st(1)", "st(2)", "st(3)", "st(4)", 366 "st(5)", "st(6)", "st(7)", "mm0", "mm1", 367 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); 368} 369 370inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { 371 NoBarrier_Store(ptr, value); 372 MemoryBarrier(); 373} 374 375inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { 376 ATOMICOPS_COMPILER_BARRIER(); 377 NoBarrier_Store(ptr, value); 378} 379 380inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { 381 Atomic64 value; 382 __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic 383 "movq %%mm0, %0\n\t" // moves (ptr could be read-only) 384 "emms\n\t" // Empty mmx state/Reset FP regs 385 : "=m" (value) 386 : "m" (*ptr) 387 : // mark the FP stack and mmx registers as clobbered 388 "st", "st(1)", "st(2)", "st(3)", "st(4)", 389 "st(5)", "st(6)", "st(7)", "mm0", "mm1", 390 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); 391 return value; 392} 393 394inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { 395 Atomic64 value = NoBarrier_Load(ptr); 396 ATOMICOPS_COMPILER_BARRIER(); 397 return value; 398} 399 400inline Atomic64 Release_Load(volatile const Atomic64* ptr) { 401 MemoryBarrier(); 402 return NoBarrier_Load(ptr); 403} 404 405#endif // defined(__x86_64__) 406 407inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, 408 Atomic64 old_value, 409 Atomic64 new_value) { 410 Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); 411 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 412 __asm__ __volatile__("lfence" : : : "memory"); 413 } 414 return x; 415} 416 417inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, 418 Atomic64 old_value, 419 Atomic64 new_value) { 420 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 421} 422 423} // namespace base::subtle 424} // namespace base 425 426#undef ATOMICOPS_COMPILER_BARRIER 427 428#endif // BASE_ATOMICOPS_INTERNALS_X86_H_ 429