1/* Copyright (c) 2006, Google Inc. 2 * All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * --- 31 * Author: Sanjay Ghemawat 32 */ 33 34// Implementation of atomic operations using Windows API 35// functions. This file should not be included directly. Clients 36// should instead include "base/atomicops.h". 37 38#ifndef BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ 39#define BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ 40 41#include <stdio.h> 42#include <stdlib.h> 43#include "base/basictypes.h" // For COMPILE_ASSERT 44 45typedef int32 Atomic32; 46 47#if defined(_WIN64) 48#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* 49#endif 50 51namespace base { 52namespace subtle { 53 54typedef int64 Atomic64; 55 56// 32-bit low-level operations on any platform 57 58extern "C" { 59// We use windows intrinsics when we can (they seem to be supported 60// well on MSVC 8.0 and above). Unfortunately, in some 61// environments, <windows.h> and <intrin.h> have conflicting 62// declarations of some other intrinsics, breaking compilation: 63// http://connect.microsoft.com/VisualStudio/feedback/details/262047 64// Therefore, we simply declare the relevant intrinsics ourself. 65 66// MinGW has a bug in the header files where it doesn't indicate the 67// first argument is volatile -- they're not up to date. See 68// http://readlist.com/lists/lists.sourceforge.net/mingw-users/0/3861.html 69// We have to const_cast away the volatile to avoid compiler warnings. 70// TODO(csilvers): remove this once MinGW has updated MinGW/include/winbase.h 71#if defined(__MINGW32__) 72inline LONG FastInterlockedCompareExchange(volatile LONG* ptr, 73 LONG newval, LONG oldval) { 74 return ::InterlockedCompareExchange(const_cast<LONG*>(ptr), newval, oldval); 75} 76inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) { 77 return ::InterlockedExchange(const_cast<LONG*>(ptr), newval); 78} 79inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) { 80 return ::InterlockedExchangeAdd(const_cast<LONG*>(ptr), increment); 81} 82 83#elif _MSC_VER >= 1400 // intrinsics didn't work so well before MSVC 8.0 84// Unfortunately, in some environments, <windows.h> and <intrin.h> 85// have conflicting declarations of some intrinsics, breaking 86// compilation. So we declare the intrinsics we need ourselves. See 87// http://connect.microsoft.com/VisualStudio/feedback/details/262047 88LONG _InterlockedCompareExchange(volatile LONG* ptr, LONG newval, LONG oldval); 89#pragma intrinsic(_InterlockedCompareExchange) 90inline LONG FastInterlockedCompareExchange(volatile LONG* ptr, 91 LONG newval, LONG oldval) { 92 return _InterlockedCompareExchange(ptr, newval, oldval); 93} 94 95LONG _InterlockedExchange(volatile LONG* ptr, LONG newval); 96#pragma intrinsic(_InterlockedExchange) 97inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) { 98 return _InterlockedExchange(ptr, newval); 99} 100 101LONG _InterlockedExchangeAdd(volatile LONG* ptr, LONG increment); 102#pragma intrinsic(_InterlockedExchangeAdd) 103inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) { 104 return _InterlockedExchangeAdd(ptr, increment); 105} 106 107#else 108inline LONG FastInterlockedCompareExchange(volatile LONG* ptr, 109 LONG newval, LONG oldval) { 110 return ::InterlockedCompareExchange(ptr, newval, oldval); 111} 112inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) { 113 return ::InterlockedExchange(ptr, newval); 114} 115inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) { 116 return ::InterlockedExchangeAdd(ptr, increment); 117} 118 119#endif // ifdef __MINGW32__ 120} // extern "C" 121 122inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, 123 Atomic32 old_value, 124 Atomic32 new_value) { 125 LONG result = FastInterlockedCompareExchange( 126 reinterpret_cast<volatile LONG*>(ptr), 127 static_cast<LONG>(new_value), 128 static_cast<LONG>(old_value)); 129 return static_cast<Atomic32>(result); 130} 131 132inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, 133 Atomic32 new_value) { 134 LONG result = FastInterlockedExchange( 135 reinterpret_cast<volatile LONG*>(ptr), 136 static_cast<LONG>(new_value)); 137 return static_cast<Atomic32>(result); 138} 139 140inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, 141 Atomic32 increment) { 142 return FastInterlockedExchangeAdd( 143 reinterpret_cast<volatile LONG*>(ptr), 144 static_cast<LONG>(increment)) + increment; 145} 146 147inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, 148 Atomic32 increment) { 149 return Barrier_AtomicIncrement(ptr, increment); 150} 151 152} // namespace base::subtle 153} // namespace base 154 155 156// In msvc8/vs2005, winnt.h already contains a definition for 157// MemoryBarrier in the global namespace. Add it there for earlier 158// versions and forward to it from within the namespace. 159#if !(defined(_MSC_VER) && _MSC_VER >= 1400) 160inline void MemoryBarrier() { 161 Atomic32 value = 0; 162 base::subtle::NoBarrier_AtomicExchange(&value, 0); 163 // actually acts as a barrier in thisd implementation 164} 165#endif 166 167namespace base { 168namespace subtle { 169 170inline void MemoryBarrier() { 171 ::MemoryBarrier(); 172} 173 174inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, 175 Atomic32 old_value, 176 Atomic32 new_value) { 177 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 178} 179 180inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, 181 Atomic32 old_value, 182 Atomic32 new_value) { 183 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 184} 185 186inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { 187 *ptr = value; 188} 189 190inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { 191 NoBarrier_AtomicExchange(ptr, value); 192 // acts as a barrier in this implementation 193} 194 195inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { 196 *ptr = value; // works w/o barrier for current Intel chips as of June 2005 197 // See comments in Atomic64 version of Release_Store() below. 198} 199 200inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { 201 return *ptr; 202} 203 204inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { 205 Atomic32 value = *ptr; 206 return value; 207} 208 209inline Atomic32 Release_Load(volatile const Atomic32* ptr) { 210 MemoryBarrier(); 211 return *ptr; 212} 213 214// 64-bit operations 215 216#if defined(_WIN64) || defined(__MINGW64__) 217 218// 64-bit low-level operations on 64-bit platform. 219 220COMPILE_ASSERT(sizeof(Atomic64) == sizeof(PVOID), atomic_word_is_atomic); 221 222// These are the intrinsics needed for 64-bit operations. Similar to the 223// 32-bit case above. 224 225extern "C" { 226#if defined(__MINGW64__) 227inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr, 228 PVOID newval, PVOID oldval) { 229 return ::InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr), 230 newval, oldval); 231} 232inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) { 233 return ::InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval); 234} 235inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr, 236 LONGLONG increment) { 237 return ::InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment); 238} 239 240#elif _MSC_VER >= 1400 // intrinsics didn't work so well before MSVC 8.0 241// Like above, we need to declare the intrinsics ourselves. 242PVOID _InterlockedCompareExchangePointer(volatile PVOID* ptr, 243 PVOID newval, PVOID oldval); 244#pragma intrinsic(_InterlockedCompareExchangePointer) 245inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr, 246 PVOID newval, PVOID oldval) { 247 return _InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr), 248 newval, oldval); 249} 250 251PVOID _InterlockedExchangePointer(volatile PVOID* ptr, PVOID newval); 252#pragma intrinsic(_InterlockedExchangePointer) 253inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) { 254 return _InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval); 255} 256 257LONGLONG _InterlockedExchangeAdd64(volatile LONGLONG* ptr, LONGLONG increment); 258#pragma intrinsic(_InterlockedExchangeAdd64) 259inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr, 260 LONGLONG increment) { 261 return _InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment); 262} 263 264#else 265inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr, 266 PVOID newval, PVOID oldval) { 267 return ::InterlockedCompareExchangePointer(ptr, newval, oldval); 268} 269inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) { 270 return ::InterlockedExchangePointer(ptr, newval); 271} 272inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr, 273 LONGLONG increment) { 274 return ::InterlockedExchangeAdd64(ptr, increment); 275} 276 277#endif // ifdef __MINGW64__ 278} // extern "C" 279 280inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, 281 Atomic64 old_value, 282 Atomic64 new_value) { 283 PVOID result = FastInterlockedCompareExchangePointer( 284 reinterpret_cast<volatile PVOID*>(ptr), 285 reinterpret_cast<PVOID>(new_value), reinterpret_cast<PVOID>(old_value)); 286 return reinterpret_cast<Atomic64>(result); 287} 288 289inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, 290 Atomic64 new_value) { 291 PVOID result = FastInterlockedExchangePointer( 292 reinterpret_cast<volatile PVOID*>(ptr), 293 reinterpret_cast<PVOID>(new_value)); 294 return reinterpret_cast<Atomic64>(result); 295} 296 297inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, 298 Atomic64 increment) { 299 return FastInterlockedExchangeAdd64( 300 reinterpret_cast<volatile LONGLONG*>(ptr), 301 static_cast<LONGLONG>(increment)) + increment; 302} 303 304inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, 305 Atomic64 increment) { 306 return Barrier_AtomicIncrement(ptr, increment); 307} 308 309inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { 310 *ptr = value; 311} 312 313inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { 314 NoBarrier_AtomicExchange(ptr, value); 315 // acts as a barrier in this implementation 316} 317 318inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { 319 *ptr = value; // works w/o barrier for current Intel chips as of June 2005 320 321 // When new chips come out, check: 322 // IA-32 Intel Architecture Software Developer's Manual, Volume 3: 323 // System Programming Guide, Chatper 7: Multiple-processor management, 324 // Section 7.2, Memory Ordering. 325 // Last seen at: 326 // http://developer.intel.com/design/pentium4/manuals/index_new.htm 327} 328 329inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { 330 return *ptr; 331} 332 333inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { 334 Atomic64 value = *ptr; 335 return value; 336} 337 338inline Atomic64 Release_Load(volatile const Atomic64* ptr) { 339 MemoryBarrier(); 340 return *ptr; 341} 342 343#else // defined(_WIN64) || defined(__MINGW64__) 344 345// 64-bit low-level operations on 32-bit platform 346 347// TODO(vchen): The GNU assembly below must be converted to MSVC inline 348// assembly. Then the file should be renamed to ...-x86-msvc.h, probably. 349 350inline void NotImplementedFatalError(const char *function_name) { 351 fprintf(stderr, "64-bit %s() not implemented on this platform\n", 352 function_name); 353 abort(); 354} 355 356inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, 357 Atomic64 old_value, 358 Atomic64 new_value) { 359#if 0 // Not implemented 360 Atomic64 prev; 361 __asm__ __volatile__("movl (%3), %%ebx\n\t" // Move 64-bit new_value into 362 "movl 4(%3), %%ecx\n\t" // ecx:ebx 363 "lock; cmpxchg8b %1\n\t" // If edx:eax (old_value) same 364 : "=A" (prev) // as contents of ptr: 365 : "m" (*ptr), // ecx:ebx => ptr 366 "0" (old_value), // else: 367 "r" (&new_value) // old *ptr => edx:eax 368 : "memory", "%ebx", "%ecx"); 369 return prev; 370#else 371 NotImplementedFatalError("NoBarrier_CompareAndSwap"); 372 return 0; 373#endif 374} 375 376inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, 377 Atomic64 new_value) { 378#if 0 // Not implemented 379 __asm__ __volatile__( 380 "movl (%2), %%ebx\n\t" // Move 64-bit new_value into 381 "movl 4(%2), %%ecx\n\t" // ecx:ebx 382 "0:\n\t" 383 "movl %1, %%eax\n\t" // Read contents of ptr into 384 "movl 4%1, %%edx\n\t" // edx:eax 385 "lock; cmpxchg8b %1\n\t" // Attempt cmpxchg; if *ptr 386 "jnz 0b\n\t" // is no longer edx:eax, loop 387 : "=A" (new_value) 388 : "m" (*ptr), 389 "r" (&new_value) 390 : "memory", "%ebx", "%ecx"); 391 return new_value; // Now it's the previous value. 392#else 393 NotImplementedFatalError("NoBarrier_AtomicExchange"); 394 return 0; 395#endif 396} 397 398inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, 399 Atomic64 increment) { 400#if 0 // Not implemented 401 Atomic64 temp = increment; 402 __asm__ __volatile__( 403 "0:\n\t" 404 "movl (%3), %%ebx\n\t" // Move 64-bit increment into 405 "movl 4(%3), %%ecx\n\t" // ecx:ebx 406 "movl (%2), %%eax\n\t" // Read contents of ptr into 407 "movl 4(%2), %%edx\n\t" // edx:eax 408 "add %%eax, %%ebx\n\t" // sum => ecx:ebx 409 "adc %%edx, %%ecx\n\t" // edx:eax still has old *ptr 410 "lock; cmpxchg8b (%2)\n\t"// Attempt cmpxchg; if *ptr 411 "jnz 0b\n\t" // is no longer edx:eax, loop 412 : "=A"(temp), "+m"(*ptr) 413 : "D" (ptr), "S" (&increment) 414 : "memory", "%ebx", "%ecx"); 415 // temp now contains the previous value of *ptr 416 return temp + increment; 417#else 418 NotImplementedFatalError("NoBarrier_AtomicIncrement"); 419 return 0; 420#endif 421} 422 423inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, 424 Atomic64 increment) { 425#if 0 // Not implemented 426 Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment); 427 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { 428 __asm__ __volatile__("lfence" : : : "memory"); 429 } 430 return new_val; 431#else 432 NotImplementedFatalError("Barrier_AtomicIncrement"); 433 return 0; 434#endif 435} 436 437inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { 438#if 0 // Not implemented 439 __asm { 440 mov mm0, value; // Use mmx reg for 64-bit atomic moves 441 mov ptr, mm0; 442 emms; // Empty mmx state to enable FP registers 443 } 444#else 445 NotImplementedFatalError("NoBarrier_Store"); 446#endif 447} 448 449inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { 450 NoBarrier_AtomicExchange(ptr, value); 451 // acts as a barrier in this implementation 452} 453 454inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { 455 NoBarrier_Store(ptr, value); 456} 457 458inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { 459#if 0 // Not implemented 460 Atomic64 value; 461 __asm { 462 mov mm0, ptr; // Use mmx reg for 64-bit atomic moves 463 mov value, mm0; 464 emms; // Empty mmx state to enable FP registers 465 } 466 return value; 467#else 468 NotImplementedFatalError("NoBarrier_Store"); 469 return 0; 470#endif 471} 472 473inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { 474 Atomic64 value = NoBarrier_Load(ptr); 475 return value; 476} 477 478inline Atomic64 Release_Load(volatile const Atomic64* ptr) { 479 MemoryBarrier(); 480 return NoBarrier_Load(ptr); 481} 482 483#endif // defined(_WIN64) || defined(__MINGW64__) 484 485 486inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, 487 Atomic64 old_value, 488 Atomic64 new_value) { 489 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 490} 491 492inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, 493 Atomic64 old_value, 494 Atomic64 new_value) { 495 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); 496} 497 498} // namespace base::subtle 499} // namespace base 500 501#endif // BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ 502