1/* Copyright (c) 2006, Google Inc.
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * ---
31 * Author: Sanjay Ghemawat
32 */
33
34// Implementation of atomic operations using Windows API
35// functions.  This file should not be included directly.  Clients
36// should instead include "base/atomicops.h".
37
38#ifndef BASE_ATOMICOPS_INTERNALS_WINDOWS_H_
39#define BASE_ATOMICOPS_INTERNALS_WINDOWS_H_
40
41#include <stdio.h>
42#include <stdlib.h>
43#include "base/basictypes.h"  // For COMPILE_ASSERT
44
45typedef int32 Atomic32;
46
47#if defined(_WIN64)
48#define BASE_HAS_ATOMIC64 1  // Use only in tests and base/atomic*
49#endif
50
51namespace base {
52namespace subtle {
53
54typedef int64 Atomic64;
55
56// 32-bit low-level operations on any platform
57
58extern "C" {
59// We use windows intrinsics when we can (they seem to be supported
60// well on MSVC 8.0 and above).  Unfortunately, in some
61// environments, <windows.h> and <intrin.h> have conflicting
62// declarations of some other intrinsics, breaking compilation:
63//   http://connect.microsoft.com/VisualStudio/feedback/details/262047
64// Therefore, we simply declare the relevant intrinsics ourself.
65
66// MinGW has a bug in the header files where it doesn't indicate the
67// first argument is volatile -- they're not up to date.  See
68//   http://readlist.com/lists/lists.sourceforge.net/mingw-users/0/3861.html
69// We have to const_cast away the volatile to avoid compiler warnings.
70// TODO(csilvers): remove this once MinGW has updated MinGW/include/winbase.h
71#if defined(__MINGW32__)
72inline LONG FastInterlockedCompareExchange(volatile LONG* ptr,
73                                           LONG newval, LONG oldval) {
74  return ::InterlockedCompareExchange(const_cast<LONG*>(ptr), newval, oldval);
75}
76inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) {
77  return ::InterlockedExchange(const_cast<LONG*>(ptr), newval);
78}
79inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) {
80  return ::InterlockedExchangeAdd(const_cast<LONG*>(ptr), increment);
81}
82
83#elif _MSC_VER >= 1400   // intrinsics didn't work so well before MSVC 8.0
84// Unfortunately, in some environments, <windows.h> and <intrin.h>
85// have conflicting declarations of some intrinsics, breaking
86// compilation.  So we declare the intrinsics we need ourselves.  See
87//   http://connect.microsoft.com/VisualStudio/feedback/details/262047
88LONG _InterlockedCompareExchange(volatile LONG* ptr, LONG newval, LONG oldval);
89#pragma intrinsic(_InterlockedCompareExchange)
90inline LONG FastInterlockedCompareExchange(volatile LONG* ptr,
91                                           LONG newval, LONG oldval) {
92  return _InterlockedCompareExchange(ptr, newval, oldval);
93}
94
95LONG _InterlockedExchange(volatile LONG* ptr, LONG newval);
96#pragma intrinsic(_InterlockedExchange)
97inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) {
98  return _InterlockedExchange(ptr, newval);
99}
100
101LONG _InterlockedExchangeAdd(volatile LONG* ptr, LONG increment);
102#pragma intrinsic(_InterlockedExchangeAdd)
103inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) {
104  return _InterlockedExchangeAdd(ptr, increment);
105}
106
107#else
108inline LONG FastInterlockedCompareExchange(volatile LONG* ptr,
109                                           LONG newval, LONG oldval) {
110  return ::InterlockedCompareExchange(ptr, newval, oldval);
111}
112inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) {
113  return ::InterlockedExchange(ptr, newval);
114}
115inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) {
116  return ::InterlockedExchangeAdd(ptr, increment);
117}
118
119#endif  // ifdef __MINGW32__
120}  // extern "C"
121
122inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
123                                         Atomic32 old_value,
124                                         Atomic32 new_value) {
125  LONG result = FastInterlockedCompareExchange(
126      reinterpret_cast<volatile LONG*>(ptr),
127      static_cast<LONG>(new_value),
128      static_cast<LONG>(old_value));
129  return static_cast<Atomic32>(result);
130}
131
132inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
133                                         Atomic32 new_value) {
134  LONG result = FastInterlockedExchange(
135      reinterpret_cast<volatile LONG*>(ptr),
136      static_cast<LONG>(new_value));
137  return static_cast<Atomic32>(result);
138}
139
140inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
141                                        Atomic32 increment) {
142  return FastInterlockedExchangeAdd(
143      reinterpret_cast<volatile LONG*>(ptr),
144      static_cast<LONG>(increment)) + increment;
145}
146
147inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
148                                          Atomic32 increment) {
149  return Barrier_AtomicIncrement(ptr, increment);
150}
151
152}  // namespace base::subtle
153}  // namespace base
154
155
156// In msvc8/vs2005, winnt.h already contains a definition for
157// MemoryBarrier in the global namespace.  Add it there for earlier
158// versions and forward to it from within the namespace.
159#if !(defined(_MSC_VER) && _MSC_VER >= 1400)
160inline void MemoryBarrier() {
161  Atomic32 value = 0;
162  base::subtle::NoBarrier_AtomicExchange(&value, 0);
163                        // actually acts as a barrier in thisd implementation
164}
165#endif
166
167namespace base {
168namespace subtle {
169
170inline void MemoryBarrier() {
171  ::MemoryBarrier();
172}
173
174inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
175                                       Atomic32 old_value,
176                                       Atomic32 new_value) {
177  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
178}
179
180inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
181                                       Atomic32 old_value,
182                                       Atomic32 new_value) {
183  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
184}
185
186inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
187  *ptr = value;
188}
189
190inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
191  NoBarrier_AtomicExchange(ptr, value);
192              // acts as a barrier in this implementation
193}
194
195inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
196  *ptr = value; // works w/o barrier for current Intel chips as of June 2005
197  // See comments in Atomic64 version of Release_Store() below.
198}
199
200inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
201  return *ptr;
202}
203
204inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
205  Atomic32 value = *ptr;
206  return value;
207}
208
209inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
210  MemoryBarrier();
211  return *ptr;
212}
213
214// 64-bit operations
215
216#if defined(_WIN64) || defined(__MINGW64__)
217
218// 64-bit low-level operations on 64-bit platform.
219
220COMPILE_ASSERT(sizeof(Atomic64) == sizeof(PVOID), atomic_word_is_atomic);
221
222// These are the intrinsics needed for 64-bit operations.  Similar to the
223// 32-bit case above.
224
225extern "C" {
226#if defined(__MINGW64__)
227inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr,
228                                                   PVOID newval, PVOID oldval) {
229  return ::InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr),
230                                             newval, oldval);
231}
232inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) {
233  return ::InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval);
234}
235inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr,
236                                             LONGLONG increment) {
237  return ::InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment);
238}
239
240#elif _MSC_VER >= 1400   // intrinsics didn't work so well before MSVC 8.0
241// Like above, we need to declare the intrinsics ourselves.
242PVOID _InterlockedCompareExchangePointer(volatile PVOID* ptr,
243                                         PVOID newval, PVOID oldval);
244#pragma intrinsic(_InterlockedCompareExchangePointer)
245inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr,
246                                                   PVOID newval, PVOID oldval) {
247  return _InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr),
248                                            newval, oldval);
249}
250
251PVOID _InterlockedExchangePointer(volatile PVOID* ptr, PVOID newval);
252#pragma intrinsic(_InterlockedExchangePointer)
253inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) {
254  return _InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval);
255}
256
257LONGLONG _InterlockedExchangeAdd64(volatile LONGLONG* ptr, LONGLONG increment);
258#pragma intrinsic(_InterlockedExchangeAdd64)
259inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr,
260                                             LONGLONG increment) {
261  return _InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment);
262}
263
264#else
265inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr,
266                                                   PVOID newval, PVOID oldval) {
267  return ::InterlockedCompareExchangePointer(ptr, newval, oldval);
268}
269inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) {
270  return ::InterlockedExchangePointer(ptr, newval);
271}
272inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr,
273                                         LONGLONG increment) {
274  return ::InterlockedExchangeAdd64(ptr, increment);
275}
276
277#endif  // ifdef __MINGW64__
278}  // extern "C"
279
280inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
281                                         Atomic64 old_value,
282                                         Atomic64 new_value) {
283  PVOID result = FastInterlockedCompareExchangePointer(
284    reinterpret_cast<volatile PVOID*>(ptr),
285    reinterpret_cast<PVOID>(new_value), reinterpret_cast<PVOID>(old_value));
286  return reinterpret_cast<Atomic64>(result);
287}
288
289inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
290                                         Atomic64 new_value) {
291  PVOID result = FastInterlockedExchangePointer(
292    reinterpret_cast<volatile PVOID*>(ptr),
293    reinterpret_cast<PVOID>(new_value));
294  return reinterpret_cast<Atomic64>(result);
295}
296
297inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
298                                        Atomic64 increment) {
299  return FastInterlockedExchangeAdd64(
300      reinterpret_cast<volatile LONGLONG*>(ptr),
301      static_cast<LONGLONG>(increment)) + increment;
302}
303
304inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
305                                          Atomic64 increment) {
306  return Barrier_AtomicIncrement(ptr, increment);
307}
308
309inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
310  *ptr = value;
311}
312
313inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
314  NoBarrier_AtomicExchange(ptr, value);
315              // acts as a barrier in this implementation
316}
317
318inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
319  *ptr = value; // works w/o barrier for current Intel chips as of June 2005
320
321  // When new chips come out, check:
322  //  IA-32 Intel Architecture Software Developer's Manual, Volume 3:
323  //  System Programming Guide, Chatper 7: Multiple-processor management,
324  //  Section 7.2, Memory Ordering.
325  // Last seen at:
326  //   http://developer.intel.com/design/pentium4/manuals/index_new.htm
327}
328
329inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
330  return *ptr;
331}
332
333inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
334  Atomic64 value = *ptr;
335  return value;
336}
337
338inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
339  MemoryBarrier();
340  return *ptr;
341}
342
343#else  // defined(_WIN64) || defined(__MINGW64__)
344
345// 64-bit low-level operations on 32-bit platform
346
347// TODO(vchen): The GNU assembly below must be converted to MSVC inline
348// assembly.  Then the file should be renamed to ...-x86-msvc.h, probably.
349
350inline void NotImplementedFatalError(const char *function_name) {
351  fprintf(stderr, "64-bit %s() not implemented on this platform\n",
352          function_name);
353  abort();
354}
355
356inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
357                                         Atomic64 old_value,
358                                         Atomic64 new_value) {
359#if 0 // Not implemented
360  Atomic64 prev;
361  __asm__ __volatile__("movl (%3), %%ebx\n\t"    // Move 64-bit new_value into
362                       "movl 4(%3), %%ecx\n\t"   // ecx:ebx
363                       "lock; cmpxchg8b %1\n\t"  // If edx:eax (old_value) same
364                       : "=A" (prev)             // as contents of ptr:
365                       : "m" (*ptr),             //   ecx:ebx => ptr
366                         "0" (old_value),        // else:
367                         "r" (&new_value)        //   old *ptr => edx:eax
368                       : "memory", "%ebx", "%ecx");
369  return prev;
370#else
371  NotImplementedFatalError("NoBarrier_CompareAndSwap");
372  return 0;
373#endif
374}
375
376inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
377                                         Atomic64 new_value) {
378#if 0 // Not implemented
379  __asm__ __volatile__(
380                       "movl (%2), %%ebx\n\t"    // Move 64-bit new_value into
381                       "movl 4(%2), %%ecx\n\t"   // ecx:ebx
382                       "0:\n\t"
383                       "movl %1, %%eax\n\t"      // Read contents of ptr into
384                       "movl 4%1, %%edx\n\t"     // edx:eax
385                       "lock; cmpxchg8b %1\n\t"  // Attempt cmpxchg; if *ptr
386                       "jnz 0b\n\t"              // is no longer edx:eax, loop
387                       : "=A" (new_value)
388                       : "m" (*ptr),
389                         "r" (&new_value)
390                       : "memory", "%ebx", "%ecx");
391  return new_value;  // Now it's the previous value.
392#else
393  NotImplementedFatalError("NoBarrier_AtomicExchange");
394  return 0;
395#endif
396}
397
398inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
399                                          Atomic64 increment) {
400#if 0 // Not implemented
401  Atomic64 temp = increment;
402  __asm__ __volatile__(
403                       "0:\n\t"
404                       "movl (%3), %%ebx\n\t"    // Move 64-bit increment into
405                       "movl 4(%3), %%ecx\n\t"   // ecx:ebx
406                       "movl (%2), %%eax\n\t"    // Read contents of ptr into
407                       "movl 4(%2), %%edx\n\t"   // edx:eax
408                       "add %%eax, %%ebx\n\t"    // sum => ecx:ebx
409                       "adc %%edx, %%ecx\n\t"    // edx:eax still has old *ptr
410                       "lock; cmpxchg8b (%2)\n\t"// Attempt cmpxchg; if *ptr
411                       "jnz 0b\n\t"              // is no longer edx:eax, loop
412                       : "=A"(temp), "+m"(*ptr)
413                       : "D" (ptr), "S" (&increment)
414                       : "memory", "%ebx", "%ecx");
415  // temp now contains the previous value of *ptr
416  return temp + increment;
417#else
418  NotImplementedFatalError("NoBarrier_AtomicIncrement");
419  return 0;
420#endif
421}
422
423inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
424                                        Atomic64 increment) {
425#if 0 // Not implemented
426  Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment);
427  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
428    __asm__ __volatile__("lfence" : : : "memory");
429  }
430  return new_val;
431#else
432  NotImplementedFatalError("Barrier_AtomicIncrement");
433  return 0;
434#endif
435}
436
437inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
438#if 0 // Not implemented
439  __asm {
440    mov mm0, value;  // Use mmx reg for 64-bit atomic moves
441    mov ptr, mm0;
442    emms;            // Empty mmx state to enable FP registers
443  }
444#else
445  NotImplementedFatalError("NoBarrier_Store");
446#endif
447}
448
449inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
450  NoBarrier_AtomicExchange(ptr, value);
451              // acts as a barrier in this implementation
452}
453
454inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
455  NoBarrier_Store(ptr, value);
456}
457
458inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
459#if 0 // Not implemented
460  Atomic64 value;
461  __asm {
462    mov mm0, ptr;    // Use mmx reg for 64-bit atomic moves
463    mov value, mm0;
464    emms;            // Empty mmx state to enable FP registers
465  }
466  return value;
467#else
468  NotImplementedFatalError("NoBarrier_Store");
469  return 0;
470#endif
471}
472
473inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
474  Atomic64 value = NoBarrier_Load(ptr);
475  return value;
476}
477
478inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
479  MemoryBarrier();
480  return NoBarrier_Load(ptr);
481}
482
483#endif  // defined(_WIN64) || defined(__MINGW64__)
484
485
486inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
487                                       Atomic64 old_value,
488                                       Atomic64 new_value) {
489  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
490}
491
492inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
493                                       Atomic64 old_value,
494                                       Atomic64 new_value) {
495  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
496}
497
498}  // namespace base::subtle
499}  // namespace base
500
501#endif  // BASE_ATOMICOPS_INTERNALS_WINDOWS_H_
502