pthread_rwlock.cpp revision 08ee8d2030fbc73c4c144e819dd68806b0351cbe
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *  * Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *  * Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *    the documentation and/or other materials provided with the
13 *    distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <errno.h>
30#include <stdatomic.h>
31
32#include "pthread_internal.h"
33#include "private/bionic_futex.h"
34#include "private/bionic_time_conversions.h"
35
36/* Technical note:
37 *
38 * Possible states of a read/write lock:
39 *
40 *  - no readers and no writer (unlocked)
41 *  - one or more readers sharing the lock at the same time (read-locked)
42 *  - one writer holding the lock (write-lock)
43 *
44 * Additionally:
45 *  - trying to get the write-lock while there are any readers blocks
46 *  - trying to get the read-lock while there is a writer blocks
47 *  - a single thread can acquire the lock multiple times in read mode
48 *
49 *  - Posix states that behavior is undefined (may deadlock) if a thread tries
50 *    to acquire the lock
51 *      - in write mode while already holding the lock (whether in read or write mode)
52 *      - in read mode while already holding the lock in write mode.
53 *  - This implementation will return EDEADLK in "write after write" and "read after
54 *    write" cases and will deadlock in write after read case.
55 *
56 * TODO: As it stands now, pending_readers and pending_writers could be merged into a
57 * a single waiters variable.  Keeping them separate adds a bit of clarity and keeps
58 * the door open for a writer-biased implementation.
59 *
60 */
61
62#define RWLOCKATTR_DEFAULT     0
63#define RWLOCKATTR_SHARED_MASK 0x0010
64
65static inline bool rwlock_is_shared(const pthread_rwlock_t* rwlock) {
66  return rwlock->attr == PTHREAD_PROCESS_SHARED;
67}
68
69static bool timespec_from_absolute(timespec* rel_timeout, const timespec* abs_timeout) {
70  if (abs_timeout != NULL) {
71    if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout, CLOCK_REALTIME)) {
72      return false;
73    }
74  }
75  return true;
76}
77
78int pthread_rwlockattr_init(pthread_rwlockattr_t* attr) {
79  *attr = PTHREAD_PROCESS_PRIVATE;
80  return 0;
81}
82
83int pthread_rwlockattr_destroy(pthread_rwlockattr_t* attr) {
84  *attr = -1;
85  return 0;
86}
87
88int pthread_rwlockattr_setpshared(pthread_rwlockattr_t* attr, int pshared) {
89  switch (pshared) {
90    case PTHREAD_PROCESS_PRIVATE:
91    case PTHREAD_PROCESS_SHARED:
92      *attr = pshared;
93      return 0;
94    default:
95      return EINVAL;
96  }
97}
98
99int pthread_rwlockattr_getpshared(const pthread_rwlockattr_t* attr, int* pshared) {
100  *pshared = *attr;
101  return 0;
102}
103
104static inline atomic_int* STATE_ATOMIC_POINTER(pthread_rwlock_t* rwlock) {
105    static_assert(sizeof(atomic_int) == sizeof(rwlock->state),
106                  "rwlock->state should actually be atomic_int in implementation.");
107
108    // We prefer casting to atomic_int instead of declaring rwlock->state to be atomic_int directly.
109    // Because using the second method pollutes pthread.h, and causes an error when compiling libcxx.
110    return reinterpret_cast<atomic_int*>(&rwlock->state);
111}
112
113static inline atomic_int* WRITER_THREAD_ID_ATOMIC_POINTER(pthread_rwlock_t* rwlock) {
114    static_assert(sizeof(atomic_int) == sizeof(rwlock->writer_thread_id),
115                  "rwlock->writer_thread_id should actually be atomic_int in implementation.");
116
117    return reinterpret_cast<atomic_int*>(&rwlock->writer_thread_id);
118}
119
120static inline atomic_uint* PENDING_READERS_ATOMIC_POINTER(pthread_rwlock_t* rwlock) {
121    static_assert(sizeof(atomic_uint) == sizeof(rwlock->pending_readers),
122                  "rwlock->pending_readers should actually be atomic_uint in implementation.");
123
124    return reinterpret_cast<atomic_uint*>(&rwlock->pending_readers);
125}
126
127static inline atomic_uint* PENDING_WRITERS_ATOMIC_POINTER(pthread_rwlock_t* rwlock) {
128    static_assert(sizeof(atomic_uint) == sizeof(rwlock->pending_writers),
129                  "rwlock->pending_writers should actually be atomic_uint in implementation.");
130
131    return reinterpret_cast<atomic_uint*>(&rwlock->pending_writers);
132}
133
134int pthread_rwlock_init(pthread_rwlock_t* rwlock, const pthread_rwlockattr_t* attr) {
135  if (__predict_true(attr == NULL)) {
136    rwlock->attr = 0;
137  } else {
138    switch (*attr) {
139      case PTHREAD_PROCESS_SHARED:
140      case PTHREAD_PROCESS_PRIVATE:
141        rwlock->attr= *attr;
142        break;
143      default:
144        return EINVAL;
145    }
146  }
147
148  atomic_init(STATE_ATOMIC_POINTER(rwlock), 0);
149  atomic_init(WRITER_THREAD_ID_ATOMIC_POINTER(rwlock), 0);
150  atomic_init(PENDING_READERS_ATOMIC_POINTER(rwlock), 0);
151  atomic_init(PENDING_WRITERS_ATOMIC_POINTER(rwlock), 0);
152
153  return 0;
154}
155
156int pthread_rwlock_destroy(pthread_rwlock_t* rwlock) {
157  if (rwlock->state != 0) {
158    return EBUSY;
159  }
160  return 0;
161}
162
163static int __pthread_rwlock_timedrdlock(pthread_rwlock_t* rwlock, const timespec* abs_timeout) {
164  if (__predict_false(__get_thread()->tid ==
165      atomic_load_explicit(WRITER_THREAD_ID_ATOMIC_POINTER(rwlock), memory_order_relaxed))) {
166    return EDEADLK;
167  }
168
169  timespec ts;
170  timespec* rel_timeout = (abs_timeout == NULL) ? NULL : &ts;
171
172  atomic_int* state_ptr = STATE_ATOMIC_POINTER(rwlock);
173
174  while (true) {
175    int cur_state = atomic_load_explicit(state_ptr, memory_order_relaxed);
176    if (__predict_true(cur_state >= 0)) {
177      if (atomic_compare_exchange_weak_explicit(state_ptr, &cur_state, cur_state + 1,
178                                                memory_order_acquire, memory_order_relaxed)) {
179        return 0;
180      }
181    } else {
182      if (!timespec_from_absolute(rel_timeout, abs_timeout)) {
183        return ETIMEDOUT;
184      }
185      atomic_uint* pending_readers_ptr = PENDING_READERS_ATOMIC_POINTER(rwlock);
186
187      // To avoid losing wake ups, the pending_readers increment should be observed before
188      // futex_wait by all threads. A seq_cst fence instead of a seq_cst operation is used
189      // here. Because only a seq_cst fence can ensure sequential consistency for non-atomic
190      // operations in futex_wait.
191      atomic_fetch_add_explicit(pending_readers_ptr, 1, memory_order_relaxed);
192      atomic_thread_fence(memory_order_seq_cst);
193      int ret = __futex_wait_ex(state_ptr, rwlock_is_shared(rwlock), cur_state, rel_timeout);
194      atomic_fetch_sub_explicit(pending_readers_ptr, 1, memory_order_relaxed);
195      if (ret == -ETIMEDOUT) {
196        return ETIMEDOUT;
197      }
198    }
199  }
200}
201
202static int __pthread_rwlock_timedwrlock(pthread_rwlock_t* rwlock, const timespec* abs_timeout) {
203  if (__predict_false(__get_thread()->tid ==
204      atomic_load_explicit(WRITER_THREAD_ID_ATOMIC_POINTER(rwlock), memory_order_relaxed))) {
205    return EDEADLK;
206  }
207
208  timespec ts;
209  timespec* rel_timeout = (abs_timeout == NULL) ? NULL : &ts;
210
211  atomic_int* state_ptr = STATE_ATOMIC_POINTER(rwlock);
212
213  while (true) {
214    int cur_state = atomic_load_explicit(state_ptr, memory_order_relaxed);
215    if (__predict_true(cur_state == 0)) {
216      if (atomic_compare_exchange_weak_explicit(state_ptr, &cur_state, -1,
217                                                memory_order_acquire, memory_order_relaxed)) {
218        // writer_thread_id is protected by rwlock and can only be modified in rwlock write
219        // owner thread. Other threads may read it for EDEADLK error checking, atomic operation
220        // is safe enough for it.
221        atomic_store_explicit(WRITER_THREAD_ID_ATOMIC_POINTER(rwlock), __get_thread()->tid,
222                              memory_order_relaxed);
223        return 0;
224      }
225    } else {
226      if (!timespec_from_absolute(rel_timeout, abs_timeout)) {
227        return ETIMEDOUT;
228      }
229
230      atomic_uint* pending_writers_ptr = PENDING_WRITERS_ATOMIC_POINTER(rwlock);
231
232      // To avoid losing wake ups, the pending_writers increment should be observed before
233      // futex_wait by all threads. A seq_cst fence instead of a seq_cst operation is used
234      // here. Because only a seq_cst fence can ensure sequential consistency for non-atomic
235      // operations in futex_wait.
236      atomic_fetch_add_explicit(pending_writers_ptr, 1, memory_order_relaxed);
237      atomic_thread_fence(memory_order_seq_cst);
238      int ret = __futex_wait_ex(state_ptr, rwlock_is_shared(rwlock), cur_state, rel_timeout);
239      atomic_fetch_sub_explicit(pending_writers_ptr, 1, memory_order_relaxed);
240      if (ret == -ETIMEDOUT) {
241        return ETIMEDOUT;
242      }
243    }
244  }
245}
246
247int pthread_rwlock_rdlock(pthread_rwlock_t* rwlock) {
248  return __pthread_rwlock_timedrdlock(rwlock, NULL);
249}
250
251int pthread_rwlock_timedrdlock(pthread_rwlock_t* rwlock, const timespec* abs_timeout) {
252  return __pthread_rwlock_timedrdlock(rwlock, abs_timeout);
253}
254
255int pthread_rwlock_tryrdlock(pthread_rwlock_t* rwlock) {
256  atomic_int* state_ptr = STATE_ATOMIC_POINTER(rwlock);
257  int cur_state = atomic_load_explicit(state_ptr, memory_order_relaxed);
258
259  while (cur_state >= 0) {
260    if (atomic_compare_exchange_weak_explicit(state_ptr, &cur_state, cur_state + 1,
261                                              memory_order_acquire, memory_order_relaxed)) {
262      return 0;
263    }
264  }
265  return EBUSY;
266}
267
268int pthread_rwlock_wrlock(pthread_rwlock_t* rwlock) {
269  return __pthread_rwlock_timedwrlock(rwlock, NULL);
270}
271
272int pthread_rwlock_timedwrlock(pthread_rwlock_t* rwlock, const timespec* abs_timeout) {
273  return __pthread_rwlock_timedwrlock(rwlock, abs_timeout);
274}
275
276int pthread_rwlock_trywrlock(pthread_rwlock_t* rwlock) {
277  atomic_int* state_ptr = STATE_ATOMIC_POINTER(rwlock);
278  int cur_state = atomic_load_explicit(state_ptr, memory_order_relaxed);
279
280  while (cur_state == 0) {
281    if (atomic_compare_exchange_weak_explicit(state_ptr, &cur_state, -1,
282                                              memory_order_acquire, memory_order_relaxed)) {
283      int tid = __get_thread()->tid;
284      atomic_store_explicit(WRITER_THREAD_ID_ATOMIC_POINTER(rwlock), tid, memory_order_relaxed);
285      return 0;
286    }
287  }
288  return EBUSY;
289}
290
291
292int pthread_rwlock_unlock(pthread_rwlock_t* rwlock) {
293  int tid = __get_thread()->tid;
294  atomic_int* state_ptr = STATE_ATOMIC_POINTER(rwlock);
295  atomic_uint* pending_readers_ptr = PENDING_READERS_ATOMIC_POINTER(rwlock);
296  atomic_uint* pending_writers_ptr = PENDING_WRITERS_ATOMIC_POINTER(rwlock);
297
298  int cur_state = atomic_load_explicit(state_ptr, memory_order_relaxed);
299  if (__predict_false(cur_state == 0)) {
300    return EPERM;
301  } else if (cur_state == -1) {
302    atomic_int* writer_thread_id_ptr = WRITER_THREAD_ID_ATOMIC_POINTER(rwlock);
303    if (atomic_load_explicit(writer_thread_id_ptr, memory_order_relaxed) != tid) {
304      return EPERM;
305    }
306    // We're no longer the owner.
307    atomic_store_explicit(writer_thread_id_ptr, 0, memory_order_relaxed);
308    // Change state from -1 to 0.
309    atomic_store_explicit(state_ptr, 0, memory_order_release);
310    goto wakeup_waiters;
311
312  } else { // cur_state > 0
313    // Reduce state by 1.
314    while (!atomic_compare_exchange_weak_explicit(state_ptr, &cur_state, cur_state - 1,
315                                                  memory_order_release, memory_order_relaxed)) {
316      if (cur_state <= 0) {
317        return EPERM;
318      }
319    }
320    if (cur_state == 1) {
321      goto wakeup_waiters;
322    }
323  }
324  return 0;
325
326wakeup_waiters:
327  // To avoid losing wake ups, the update of state should be observed before reading
328  // pending_readers/pending_writers by all threads. Use read locking as an example:
329  //     read locking thread                        unlocking thread
330  //      pending_readers++;                         state = 0;
331  //      seq_cst fence                              seq_cst fence
332  //      read state for futex_wait                  read pending_readers for futex_wake
333  //
334  // So when locking and unlocking threads are running in parallel, we will not get
335  // in a situation that the locking thread reads state as negative and needs to wait,
336  // while the unlocking thread reads pending_readers as zero and doesn't need to wake up waiters.
337  atomic_thread_fence(memory_order_seq_cst);
338  if (__predict_false(atomic_load_explicit(pending_readers_ptr, memory_order_relaxed) > 0 ||
339                      atomic_load_explicit(pending_writers_ptr, memory_order_relaxed) > 0)) {
340    __futex_wake_ex(state_ptr, rwlock_is_shared(rwlock), INT_MAX);
341  }
342  return 0;
343}
344