1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// This file is an internal atomic implementation, use base/atomicops.h instead. 6 7#ifndef BASE_ATOMICOPS_INTERNALS_X86_GCC_H_ 8#define BASE_ATOMICOPS_INTERNALS_X86_GCC_H_ 9#pragma once 10 11// This struct is not part of the public API of this module; clients may not 12// use it. 13// Features of this x86. Values may not be correct before main() is run, 14// but are set conservatively. 15struct AtomicOps_x86CPUFeatureStruct { 16 bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence 17 // after acquire compare-and-swap. 18 bool has_sse2; // Processor has SSE2. 19}; 20extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures; 21 22#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") 23 24namespace base { 25namespace subtle { 26 27// 32-bit low-level operations on any platform. 28 29inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, 30 Atomic32 old_value, 31 Atomic32 new_value) { 32 Atomic32 prev; 33 __asm__ __volatile__("lock; cmpxchgl %1,%2" 34 : "=a" (prev) 35 : "q" (new_value), "m" (*ptr), "0" (old_value) 36 : "memory"); 37 return prev; 38} 39 40inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, 41 Atomic32 new_value) { 42 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. 43 : "=r" (new_value) 44 : "m" (*ptr), "0" (new_value) 45 : "memory"); 46 return new_value; // Now it's the previous value. 47} 48 49inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, 50 Atomic32 increment) { 51 Atomic32 temp = increment; 52 __asm__ __volatile__("lock; xaddl %0,%1" 53 : "+r" (temp), "+m" (*ptr) 54 : : "memory"); 55 // temp now holds the old value of *ptr 56 return temp + increment; 57} 58 59inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, 60 Atomic32 increment) { 61 Atomic32 temp = increment; 62 __asm__ __volatile__("lock; xaddl %0,%1" 63 : "+r" (temp), "+m" (*ptr) 64 : : "memory"); 65 // temp now holds the old value of *ptr 66 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 67 __asm__ __volatile__("lfence" : : : "memory"); 68 } 69 return temp + increment; 70} 71 72inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, 73 Atomic32 old_value, 74 Atomic32 new_value) { 75 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); 76 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 77 __asm__ __volatile__("lfence" : : : "memory"); 78 } 79 return x; 80} 81 82inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, 83 Atomic32 old_value, 84 Atomic32 new_value) { 85 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 86} 87 88inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { 89 *ptr = value; 90} 91 92#if defined(__x86_64__) 93 94// 64-bit implementations of memory barrier can be simpler, because it 95// "mfence" is guaranteed to exist. 96inline void MemoryBarrier() { 97 __asm__ __volatile__("mfence" : : : "memory"); 98} 99 100inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { 101 *ptr = value; 102 MemoryBarrier(); 103} 104 105#else 106 107inline void MemoryBarrier() { 108 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { 109 __asm__ __volatile__("mfence" : : : "memory"); 110 } else { // mfence is faster but not present on PIII 111 Atomic32 x = 0; 112 NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII 113 } 114} 115 116inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { 117 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { 118 *ptr = value; 119 __asm__ __volatile__("mfence" : : : "memory"); 120 } else { 121 NoBarrier_AtomicExchange(ptr, value); 122 // acts as a barrier on PIII 123 } 124} 125#endif 126 127inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { 128 ATOMICOPS_COMPILER_BARRIER(); 129 *ptr = value; // An x86 store acts as a release barrier. 130 // See comments in Atomic64 version of Release_Store(), below. 131} 132 133inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { 134 return *ptr; 135} 136 137inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { 138 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. 139 // See comments in Atomic64 version of Release_Store(), below. 140 ATOMICOPS_COMPILER_BARRIER(); 141 return value; 142} 143 144inline Atomic32 Release_Load(volatile const Atomic32* ptr) { 145 MemoryBarrier(); 146 return *ptr; 147} 148 149#if defined(__x86_64__) 150 151// 64-bit low-level operations on 64-bit platform. 152 153inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, 154 Atomic64 old_value, 155 Atomic64 new_value) { 156 Atomic64 prev; 157 __asm__ __volatile__("lock; cmpxchgq %1,%2" 158 : "=a" (prev) 159 : "q" (new_value), "m" (*ptr), "0" (old_value) 160 : "memory"); 161 return prev; 162} 163 164inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, 165 Atomic64 new_value) { 166 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. 167 : "=r" (new_value) 168 : "m" (*ptr), "0" (new_value) 169 : "memory"); 170 return new_value; // Now it's the previous value. 171} 172 173inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, 174 Atomic64 increment) { 175 Atomic64 temp = increment; 176 __asm__ __volatile__("lock; xaddq %0,%1" 177 : "+r" (temp), "+m" (*ptr) 178 : : "memory"); 179 // temp now contains the previous value of *ptr 180 return temp + increment; 181} 182 183inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, 184 Atomic64 increment) { 185 Atomic64 temp = increment; 186 __asm__ __volatile__("lock; xaddq %0,%1" 187 : "+r" (temp), "+m" (*ptr) 188 : : "memory"); 189 // temp now contains the previous value of *ptr 190 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 191 __asm__ __volatile__("lfence" : : : "memory"); 192 } 193 return temp + increment; 194} 195 196inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { 197 *ptr = value; 198} 199 200inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { 201 *ptr = value; 202 MemoryBarrier(); 203} 204 205inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { 206 ATOMICOPS_COMPILER_BARRIER(); 207 208 *ptr = value; // An x86 store acts as a release barrier 209 // for current AMD/Intel chips as of Jan 2008. 210 // See also Acquire_Load(), below. 211 212 // When new chips come out, check: 213 // IA-32 Intel Architecture Software Developer's Manual, Volume 3: 214 // System Programming Guide, Chatper 7: Multiple-processor management, 215 // Section 7.2, Memory Ordering. 216 // Last seen at: 217 // http://developer.intel.com/design/pentium4/manuals/index_new.htm 218 // 219 // x86 stores/loads fail to act as barriers for a few instructions (clflush 220 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are 221 // not generated by the compiler, and are rare. Users of these instructions 222 // need to know about cache behaviour in any case since all of these involve 223 // either flushing cache lines or non-temporal cache hints. 224} 225 226inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { 227 return *ptr; 228} 229 230inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { 231 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, 232 // for current AMD/Intel chips as of Jan 2008. 233 // See also Release_Store(), above. 234 ATOMICOPS_COMPILER_BARRIER(); 235 return value; 236} 237 238inline Atomic64 Release_Load(volatile const Atomic64* ptr) { 239 MemoryBarrier(); 240 return *ptr; 241} 242 243inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, 244 Atomic64 old_value, 245 Atomic64 new_value) { 246 Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); 247 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 248 __asm__ __volatile__("lfence" : : : "memory"); 249 } 250 return x; 251} 252 253inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, 254 Atomic64 old_value, 255 Atomic64 new_value) { 256 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 257} 258 259#endif // defined(__x86_64__) 260 261} // namespace base::subtle 262} // namespace base 263 264#undef ATOMICOPS_COMPILER_BARRIER 265 266#endif // BASE_ATOMICOPS_INTERNALS_X86_GCC_H_ 267