1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_RUNTIME_ATOMIC_H_
18#define ART_RUNTIME_ATOMIC_H_
19
20#include <stdint.h>
21#include <atomic>
22#include <limits>
23#include <vector>
24
25#include "base/logging.h"
26#include "base/macros.h"
27
28namespace art {
29
30class Mutex;
31
32// QuasiAtomic encapsulates two separate facilities that we are
33// trying to move away from:  "quasiatomic" 64 bit operations
34// and custom memory fences.  For the time being, they remain
35// exposed.  Clients should be converted to use either class Atomic
36// below whenever possible, and should eventually use C++11 atomics.
37// The two facilities that do not have a good C++11 analog are
38// ThreadFenceForConstructor and Atomic::*JavaData.
39//
40// NOTE: Two "quasiatomic" operations on the exact same memory address
41// are guaranteed to operate atomically with respect to each other,
42// but no guarantees are made about quasiatomic operations mixed with
43// non-quasiatomic operations on the same address, nor about
44// quasiatomic operations that are performed on partially-overlapping
45// memory.
46class QuasiAtomic {
47#if defined(__mips__) && !defined(__LP64__)
48  static constexpr bool kNeedSwapMutexes = true;
49#else
50  static constexpr bool kNeedSwapMutexes = false;
51#endif
52
53 public:
54  static void Startup();
55
56  static void Shutdown();
57
58  // Reads the 64-bit value at "addr" without tearing.
59  static int64_t Read64(volatile const int64_t* addr) {
60    if (!kNeedSwapMutexes) {
61      int64_t value;
62#if defined(__LP64__)
63      value = *addr;
64#else
65#if defined(__arm__)
66#if defined(__ARM_FEATURE_LPAE)
67      // With LPAE support (such as Cortex-A15) then ldrd is defined not to tear.
68      __asm__ __volatile__("@ QuasiAtomic::Read64\n"
69        "ldrd     %0, %H0, %1"
70        : "=r" (value)
71        : "m" (*addr));
72#else
73      // Exclusive loads are defined not to tear, clearing the exclusive state isn't necessary.
74      __asm__ __volatile__("@ QuasiAtomic::Read64\n"
75        "ldrexd     %0, %H0, %1"
76        : "=r" (value)
77        : "Q" (*addr));
78#endif
79#elif defined(__i386__)
80  __asm__ __volatile__(
81      "movq     %1, %0\n"
82      : "=x" (value)
83      : "m" (*addr));
84#else
85      LOG(FATAL) << "Unsupported architecture";
86#endif
87#endif  // defined(__LP64__)
88      return value;
89    } else {
90      return SwapMutexRead64(addr);
91    }
92  }
93
94  // Writes to the 64-bit value at "addr" without tearing.
95  static void Write64(volatile int64_t* addr, int64_t value) {
96    if (!kNeedSwapMutexes) {
97#if defined(__LP64__)
98      *addr = value;
99#else
100#if defined(__arm__)
101#if defined(__ARM_FEATURE_LPAE)
102    // If we know that ARM architecture has LPAE (such as Cortex-A15) strd is defined not to tear.
103    __asm__ __volatile__("@ QuasiAtomic::Write64\n"
104      "strd     %1, %H1, %0"
105      : "=m"(*addr)
106      : "r" (value));
107#else
108    // The write is done as a swap so that the cache-line is in the exclusive state for the store.
109    int64_t prev;
110    int status;
111    do {
112      __asm__ __volatile__("@ QuasiAtomic::Write64\n"
113        "ldrexd     %0, %H0, %2\n"
114        "strexd     %1, %3, %H3, %2"
115        : "=&r" (prev), "=&r" (status), "+Q"(*addr)
116        : "r" (value)
117        : "cc");
118      } while (UNLIKELY(status != 0));
119#endif
120#elif defined(__i386__)
121      __asm__ __volatile__(
122        "movq     %1, %0"
123        : "=m" (*addr)
124        : "x" (value));
125#else
126      LOG(FATAL) << "Unsupported architecture";
127#endif
128#endif  // defined(__LP64__)
129    } else {
130      SwapMutexWrite64(addr, value);
131    }
132  }
133
134  // Atomically compare the value at "addr" to "old_value", if equal replace it with "new_value"
135  // and return true. Otherwise, don't swap, and return false.
136  // This is fully ordered, i.e. it has C++11 memory_order_seq_cst
137  // semantics (assuming all other accesses use a mutex if this one does).
138  // This has "strong" semantics; if it fails then it is guaranteed that
139  // at some point during the execution of Cas64, *addr was not equal to
140  // old_value.
141  static bool Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
142    if (!kNeedSwapMutexes) {
143      return __sync_bool_compare_and_swap(addr, old_value, new_value);
144    } else {
145      return SwapMutexCas64(old_value, new_value, addr);
146    }
147  }
148
149  // Does the architecture provide reasonable atomic long operations or do we fall back on mutexes?
150  static bool LongAtomicsUseMutexes() {
151    return kNeedSwapMutexes;
152  }
153
154  static void ThreadFenceAcquire() {
155    std::atomic_thread_fence(std::memory_order_acquire);
156  }
157
158  static void ThreadFenceRelease() {
159    std::atomic_thread_fence(std::memory_order_release);
160  }
161
162  static void ThreadFenceForConstructor() {
163    #if defined(__aarch64__)
164      __asm__ __volatile__("dmb ishst" : : : "memory");
165    #else
166      std::atomic_thread_fence(std::memory_order_release);
167    #endif
168  }
169
170  static void ThreadFenceSequentiallyConsistent() {
171    std::atomic_thread_fence(std::memory_order_seq_cst);
172  }
173
174 private:
175  static Mutex* GetSwapMutex(const volatile int64_t* addr);
176  static int64_t SwapMutexRead64(volatile const int64_t* addr);
177  static void SwapMutexWrite64(volatile int64_t* addr, int64_t val);
178  static bool SwapMutexCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr);
179
180  // We stripe across a bunch of different mutexes to reduce contention.
181  static constexpr size_t kSwapMutexCount = 32;
182  static std::vector<Mutex*>* gSwapMutexes;
183
184  DISALLOW_COPY_AND_ASSIGN(QuasiAtomic);
185};
186
187template<typename T>
188class PACKED(sizeof(T)) Atomic : public std::atomic<T> {
189 public:
190  Atomic<T>() : std::atomic<T>(0) { }
191
192  explicit Atomic<T>(T value) : std::atomic<T>(value) { }
193
194  // Load from memory without ordering or synchronization constraints.
195  T LoadRelaxed() const {
196    return this->load(std::memory_order_relaxed);
197  }
198
199  // Word tearing allowed, but may race.
200  // TODO: Optimize?
201  // There has been some discussion of eventually disallowing word
202  // tearing for Java data loads.
203  T LoadJavaData() const {
204    return this->load(std::memory_order_relaxed);
205  }
206
207  // Load from memory with a total ordering.
208  // Corresponds exactly to a Java volatile load.
209  T LoadSequentiallyConsistent() const {
210    return this->load(std::memory_order_seq_cst);
211  }
212
213  // Store to memory without ordering or synchronization constraints.
214  void StoreRelaxed(T desired) {
215    this->store(desired, std::memory_order_relaxed);
216  }
217
218  // Word tearing allowed, but may race.
219  void StoreJavaData(T desired) {
220    this->store(desired, std::memory_order_relaxed);
221  }
222
223  // Store to memory with release ordering.
224  void StoreRelease(T desired) {
225    this->store(desired, std::memory_order_release);
226  }
227
228  // Store to memory with a total ordering.
229  void StoreSequentiallyConsistent(T desired) {
230    this->store(desired, std::memory_order_seq_cst);
231  }
232
233  // Atomically replace the value with desired value if it matches the expected value.
234  // Participates in total ordering of atomic operations.
235  bool CompareExchangeStrongSequentiallyConsistent(T expected_value, T desired_value) {
236    return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_seq_cst);
237  }
238
239  // The same, except it may fail spuriously.
240  bool CompareExchangeWeakSequentiallyConsistent(T expected_value, T desired_value) {
241    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_seq_cst);
242  }
243
244  // Atomically replace the value with desired value if it matches the expected value. Doesn't
245  // imply ordering or synchronization constraints.
246  bool CompareExchangeStrongRelaxed(T expected_value, T desired_value) {
247    return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_relaxed);
248  }
249
250  // The same, except it may fail spuriously.
251  bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
252    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_relaxed);
253  }
254
255  // Atomically replace the value with desired value if it matches the expected value. Prior writes
256  // made to other memory locations by the thread that did the release become visible in this
257  // thread.
258  bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
259    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_acquire);
260  }
261
262  // Atomically replace the value with desired value if it matches the expected value. prior writes
263  // to other memory locations become visible to the threads that do a consume or an acquire on the
264  // same location.
265  bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
266    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_release);
267  }
268
269  T FetchAndAddSequentiallyConsistent(const T value) {
270    return this->fetch_add(value, std::memory_order_seq_cst);  // Return old_value.
271  }
272
273  T FetchAndSubSequentiallyConsistent(const T value) {
274    return this->fetch_sub(value, std::memory_order_seq_cst);  // Return old value.
275  }
276
277  T FetchAndOrSequentiallyConsistent(const T value) {
278    return this->fetch_or(value, std::memory_order_seq_cst);  // Return old_value.
279  }
280
281  T FetchAndAndSequentiallyConsistent(const T value) {
282    return this->fetch_and(value, std::memory_order_seq_cst);  // Return old_value.
283  }
284
285  volatile T* Address() {
286    return reinterpret_cast<T*>(this);
287  }
288
289  static T MaxValue() {
290    return std::numeric_limits<T>::max();
291  }
292};
293
294typedef Atomic<int32_t> AtomicInteger;
295
296COMPILE_ASSERT(sizeof(AtomicInteger) == sizeof(int32_t), weird_atomic_int_size);
297COMPILE_ASSERT(alignof(AtomicInteger) == alignof(int32_t),
298               atomic_int_alignment_differs_from_that_of_underlying_type);
299COMPILE_ASSERT(sizeof(Atomic<int64_t>) == sizeof(int64_t), weird_atomic_int64_size);
300
301// Assert the alignment of 64-bit integers is 64-bit. This isn't true on certain 32-bit
302// architectures (e.g. x86-32) but we know that 64-bit integers here are arranged to be 8-byte
303// aligned.
304#if defined(__LP64__)
305  COMPILE_ASSERT(alignof(Atomic<int64_t>) == alignof(int64_t),
306                 atomic_int64_alignment_differs_from_that_of_underlying_type);
307#endif
308
309}  // namespace art
310
311#endif  // ART_RUNTIME_ATOMIC_H_
312