pthread_rwlock.cpp revision 76615dae93c18ac890e167c547a08c0228709a33
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *  * Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *  * Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *    the documentation and/or other materials provided with the
13 *    distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <errno.h>
30#include <stdatomic.h>
31#include <string.h>
32
33#include "pthread_internal.h"
34#include "private/bionic_futex.h"
35#include "private/bionic_lock.h"
36#include "private/bionic_time_conversions.h"
37
38/* Technical note:
39 *
40 * Possible states of a read/write lock:
41 *
42 *  - no readers and no writer (unlocked)
43 *  - one or more readers sharing the lock at the same time (read-locked)
44 *  - one writer holding the lock (write-lock)
45 *
46 * Additionally:
47 *  - trying to get the write-lock while there are any readers blocks
48 *  - trying to get the read-lock while there is a writer blocks
49 *  - a single thread can acquire the lock multiple times in read mode
50 *
51 *  - Posix states that behavior is undefined (may deadlock) if a thread tries
52 *    to acquire the lock
53 *      - in write mode while already holding the lock (whether in read or write mode)
54 *      - in read mode while already holding the lock in write mode.
55 *  - This implementation will return EDEADLK in "write after write" and "read after
56 *    write" cases and will deadlock in write after read case.
57 *
58 */
59
60// A rwlockattr is implemented as a 32-bit integer which has following fields:
61//  bits    name              description
62//   1     rwlock_kind       have rwlock preference like PTHREAD_RWLOCK_PREFER_READER_NP.
63//   0      process_shared    set to 1 if the rwlock is shared between processes.
64
65#define RWLOCKATTR_PSHARED_SHIFT 0
66#define RWLOCKATTR_KIND_SHIFT    1
67
68#define RWLOCKATTR_PSHARED_MASK  1
69#define RWLOCKATTR_KIND_MASK     2
70#define RWLOCKATTR_RESERVED_MASK (~3)
71
72static inline __always_inline __always_inline bool __rwlockattr_getpshared(const pthread_rwlockattr_t* attr) {
73  return (*attr & RWLOCKATTR_PSHARED_MASK) >> RWLOCKATTR_PSHARED_SHIFT;
74}
75
76static inline __always_inline __always_inline void __rwlockattr_setpshared(pthread_rwlockattr_t* attr, int pshared) {
77  *attr = (*attr & ~RWLOCKATTR_PSHARED_MASK) | (pshared << RWLOCKATTR_PSHARED_SHIFT);
78}
79
80static inline __always_inline int __rwlockattr_getkind(const pthread_rwlockattr_t* attr) {
81  return (*attr & RWLOCKATTR_KIND_MASK) >> RWLOCKATTR_KIND_SHIFT;
82}
83
84static inline __always_inline void __rwlockattr_setkind(pthread_rwlockattr_t* attr, int kind) {
85  *attr = (*attr & ~RWLOCKATTR_KIND_MASK) | (kind << RWLOCKATTR_KIND_SHIFT);
86}
87
88
89int pthread_rwlockattr_init(pthread_rwlockattr_t* attr) {
90  *attr = 0;
91  return 0;
92}
93
94int pthread_rwlockattr_destroy(pthread_rwlockattr_t* attr) {
95  *attr = -1;
96  return 0;
97}
98
99int pthread_rwlockattr_getpshared(const pthread_rwlockattr_t* attr, int* pshared) {
100  if (__rwlockattr_getpshared(attr)) {
101    *pshared = PTHREAD_PROCESS_SHARED;
102  } else {
103    *pshared = PTHREAD_PROCESS_PRIVATE;
104  }
105  return 0;
106}
107
108int pthread_rwlockattr_setpshared(pthread_rwlockattr_t* attr, int pshared) {
109  switch (pshared) {
110    case PTHREAD_PROCESS_PRIVATE:
111      __rwlockattr_setpshared(attr, 0);
112      return 0;
113    case PTHREAD_PROCESS_SHARED:
114      __rwlockattr_setpshared(attr, 1);
115      return 0;
116    default:
117      return EINVAL;
118  }
119}
120
121int pthread_rwlockattr_getkind_np(const pthread_rwlockattr_t* attr, int* pref) {
122  *pref = __rwlockattr_getkind(attr);
123  return 0;
124}
125
126int pthread_rwlockattr_setkind_np(pthread_rwlockattr_t* attr, int pref) {
127  switch (pref) {
128    case PTHREAD_RWLOCK_PREFER_READER_NP:   // Fall through.
129    case PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP:
130      __rwlockattr_setkind(attr, pref);
131      return 0;
132    default:
133      return EINVAL;
134  }
135}
136
137// A rwlock state is implemented as a 32-bit integer which has following rules:
138//  bits      name                              description
139//   31      owned_by_writer_flag              set to 1 if the lock is owned by a writer now.
140//  30-2     reader_count                      the count of readers holding the lock.
141//   1       have_pending_writers              set to 1 if having pending writers.
142//   0       have_pending_readers              set to 1 if having pending readers.
143
144#define STATE_HAVE_PENDING_READERS_SHIFT    0
145#define STATE_HAVE_PENDING_WRITERS_SHIFT    1
146#define STATE_READER_COUNT_SHIFT            2
147#define STATE_OWNED_BY_WRITER_SHIFT        31
148
149#define STATE_HAVE_PENDING_READERS_FLAG     (1 << STATE_HAVE_PENDING_READERS_SHIFT)
150#define STATE_HAVE_PENDING_WRITERS_FLAG     (1 << STATE_HAVE_PENDING_WRITERS_SHIFT)
151#define STATE_READER_COUNT_CHANGE_STEP  (1 << STATE_READER_COUNT_SHIFT)
152#define STATE_OWNED_BY_WRITER_FLAG      (1 << STATE_OWNED_BY_WRITER_SHIFT)
153
154#define STATE_HAVE_PENDING_READERS_OR_WRITERS_FLAG \
155          (STATE_HAVE_PENDING_READERS_FLAG | STATE_HAVE_PENDING_WRITERS_FLAG)
156
157struct pthread_rwlock_internal_t {
158  atomic_int state;
159  atomic_int writer_tid;
160
161  bool pshared;
162  bool writer_nonrecursive_preferred;
163  uint16_t __pad;
164
165// When a reader thread plans to suspend on the rwlock, it will add STATE_HAVE_PENDING_READERS_FLAG
166// in state, increase pending_reader_count, and wait on pending_reader_wakeup_serial. After woken
167// up, the reader thread decreases pending_reader_count, and the last pending reader thread should
168// remove STATE_HAVE_PENDING_READERS_FLAG in state. A pending writer thread works in a similar way,
169// except that it uses flag and members for writer threads.
170
171  Lock pending_lock;  // All pending members below are protected by pending_lock.
172  uint32_t pending_reader_count;  // Count of pending reader threads.
173  uint32_t pending_writer_count;  // Count of pending writer threads.
174  uint32_t pending_reader_wakeup_serial;  // Pending reader threads wait on this address by futex_wait.
175  uint32_t pending_writer_wakeup_serial;  // Pending writer threads wait on this address by futex_wait.
176
177#if defined(__LP64__)
178  char __reserved[20];
179#else
180  char __reserved[4];
181#endif
182};
183
184static inline __always_inline bool __state_owned_by_writer(int state) {
185  return state < 0;
186}
187
188static inline __always_inline bool __state_owned_by_readers(int state) {
189  // If state >= 0, the owned_by_writer_flag is not set.
190  // And if state >= STATE_READER_COUNT_CHANGE_STEP, the reader_count field is not empty.
191  return state >= STATE_READER_COUNT_CHANGE_STEP;
192}
193
194static inline __always_inline bool __state_owned_by_readers_or_writer(int state) {
195  return state < 0 || state >= STATE_READER_COUNT_CHANGE_STEP;
196}
197
198static inline __always_inline int __state_add_writer_flag(int state) {
199  return state | STATE_OWNED_BY_WRITER_FLAG;
200}
201
202static inline __always_inline bool __state_is_last_reader(int state) {
203  return (state >> STATE_READER_COUNT_SHIFT) == 1;
204}
205
206static inline __always_inline bool __state_have_pending_writers(int state) {
207  return state & STATE_HAVE_PENDING_WRITERS_FLAG;
208}
209
210static inline __always_inline bool __state_have_pending_readers_or_writers(int state) {
211  return state & STATE_HAVE_PENDING_READERS_OR_WRITERS_FLAG;
212}
213
214static_assert(sizeof(pthread_rwlock_t) == sizeof(pthread_rwlock_internal_t),
215              "pthread_rwlock_t should actually be pthread_rwlock_internal_t in implementation.");
216
217// For binary compatibility with old version of pthread_rwlock_t, we can't use more strict
218// alignment than 4-byte alignment.
219static_assert(alignof(pthread_rwlock_t) == 4,
220             "pthread_rwlock_t should fulfill the alignment requirement of pthread_rwlock_internal_t.");
221
222static inline __always_inline pthread_rwlock_internal_t* __get_internal_rwlock(pthread_rwlock_t* rwlock_interface) {
223  return reinterpret_cast<pthread_rwlock_internal_t*>(rwlock_interface);
224}
225
226int pthread_rwlock_init(pthread_rwlock_t* rwlock_interface, const pthread_rwlockattr_t* attr) {
227  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
228
229  memset(rwlock, 0, sizeof(pthread_rwlock_internal_t));
230
231  if (__predict_false(attr != NULL)) {
232    rwlock->pshared = __rwlockattr_getpshared(attr);
233    int kind = __rwlockattr_getkind(attr);
234    switch (kind) {
235      case PTHREAD_RWLOCK_PREFER_READER_NP:
236        rwlock->writer_nonrecursive_preferred = false;
237        break;
238      case PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP:
239        rwlock->writer_nonrecursive_preferred = true;
240        break;
241      default:
242        return EINVAL;
243    }
244    if ((*attr & RWLOCKATTR_RESERVED_MASK) != 0) {
245      return EINVAL;
246    }
247  }
248
249  atomic_init(&rwlock->state, 0);
250  rwlock->pending_lock.init(rwlock->pshared);
251  return 0;
252}
253
254int pthread_rwlock_destroy(pthread_rwlock_t* rwlock_interface) {
255  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
256
257  if (atomic_load_explicit(&rwlock->state, memory_order_relaxed) != 0) {
258    return EBUSY;
259  }
260  return 0;
261}
262
263static inline __always_inline bool __can_acquire_read_lock(int old_state,
264                                                             bool writer_nonrecursive_preferred) {
265  // If writer is preferred with nonrecursive reader, we prevent further readers from acquiring
266  // the lock when there are writers waiting for the lock.
267  bool cannot_apply = __state_owned_by_writer(old_state) ||
268                      (writer_nonrecursive_preferred && __state_have_pending_writers(old_state));
269  return !cannot_apply;
270}
271
272static inline __always_inline int __pthread_rwlock_tryrdlock(pthread_rwlock_internal_t* rwlock) {
273  int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
274
275  while (__predict_true(__can_acquire_read_lock(old_state, rwlock->writer_nonrecursive_preferred))) {
276
277    int new_state = old_state + STATE_READER_COUNT_CHANGE_STEP;
278    if (__predict_false(!__state_owned_by_readers(new_state))) { // Happens when reader count overflows.
279      return EAGAIN;
280    }
281    if (__predict_true(atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state, new_state,
282                                              memory_order_acquire, memory_order_relaxed))) {
283      return 0;
284    }
285  }
286  return EBUSY;
287}
288
289static int __pthread_rwlock_timedrdlock(pthread_rwlock_internal_t* rwlock,
290                                        const timespec* abs_timeout_or_null) {
291
292  if (atomic_load_explicit(&rwlock->writer_tid, memory_order_relaxed) == __get_thread()->tid) {
293    return EDEADLK;
294  }
295
296  while (true) {
297    int ret = __pthread_rwlock_tryrdlock(rwlock);
298    if (ret == 0 || ret == EAGAIN) {
299      return ret;
300    }
301
302    int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
303    if (__can_acquire_read_lock(old_state, rwlock->writer_nonrecursive_preferred)) {
304      continue;
305    }
306
307    timespec ts;
308    timespec* rel_timeout = NULL;
309
310    if (abs_timeout_or_null != NULL) {
311      rel_timeout = &ts;
312      if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout_or_null, CLOCK_REALTIME)) {
313        return ETIMEDOUT;
314      }
315    }
316
317    rwlock->pending_lock.lock();
318    rwlock->pending_reader_count++;
319
320    // We rely on the fact that all atomic exchange operations on the same object (here it is
321    // rwlock->state) always appear to occur in a single total order. If the pending flag is added
322    // before unlocking, the unlocking thread will wakeup the waiter. Otherwise, we will see the
323    // state is unlocked and will not wait anymore.
324    old_state = atomic_fetch_or_explicit(&rwlock->state, STATE_HAVE_PENDING_READERS_FLAG,
325                                         memory_order_relaxed);
326
327    int old_serial = rwlock->pending_reader_wakeup_serial;
328    rwlock->pending_lock.unlock();
329
330    int futex_ret = 0;
331    if (!__can_acquire_read_lock(old_state, rwlock->writer_nonrecursive_preferred)) {
332      futex_ret = __futex_wait_ex(&rwlock->pending_reader_wakeup_serial, rwlock->pshared,
333                                  old_serial, rel_timeout);
334    }
335
336    rwlock->pending_lock.lock();
337    rwlock->pending_reader_count--;
338    if (rwlock->pending_reader_count == 0) {
339      atomic_fetch_and_explicit(&rwlock->state, ~STATE_HAVE_PENDING_READERS_FLAG,
340                                memory_order_relaxed);
341    }
342    rwlock->pending_lock.unlock();
343
344    if (futex_ret == -ETIMEDOUT) {
345      return ETIMEDOUT;
346    }
347  }
348}
349
350static inline __always_inline bool __can_acquire_write_lock(int old_state) {
351  return !__state_owned_by_readers_or_writer(old_state);
352}
353
354static inline __always_inline int __pthread_rwlock_trywrlock(pthread_rwlock_internal_t* rwlock) {
355  int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
356
357  while (__predict_true(__can_acquire_write_lock(old_state))) {
358    if (__predict_true(atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state,
359          __state_add_writer_flag(old_state), memory_order_acquire, memory_order_relaxed))) {
360
361      atomic_store_explicit(&rwlock->writer_tid, __get_thread()->tid, memory_order_relaxed);
362      return 0;
363    }
364  }
365  return EBUSY;
366}
367
368static int __pthread_rwlock_timedwrlock(pthread_rwlock_internal_t* rwlock,
369                                        const timespec* abs_timeout_or_null) {
370
371  if (atomic_load_explicit(&rwlock->writer_tid, memory_order_relaxed) == __get_thread()->tid) {
372    return EDEADLK;
373  }
374  while (true) {
375    int ret = __pthread_rwlock_trywrlock(rwlock);
376    if (ret == 0) {
377      return ret;
378    }
379
380    int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
381    if (__can_acquire_write_lock(old_state)) {
382      continue;
383    }
384
385    timespec ts;
386    timespec* rel_timeout = NULL;
387
388    if (abs_timeout_or_null != NULL) {
389      rel_timeout = &ts;
390      if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout_or_null, CLOCK_REALTIME)) {
391        return ETIMEDOUT;
392      }
393    }
394
395    rwlock->pending_lock.lock();
396    rwlock->pending_writer_count++;
397
398    old_state = atomic_fetch_or_explicit(&rwlock->state, STATE_HAVE_PENDING_WRITERS_FLAG,
399                                         memory_order_relaxed);
400
401    int old_serial = rwlock->pending_writer_wakeup_serial;
402    rwlock->pending_lock.unlock();
403
404    int futex_ret = 0;
405    if (!__can_acquire_write_lock(old_state)) {
406      futex_ret = __futex_wait_ex(&rwlock->pending_writer_wakeup_serial, rwlock->pshared,
407                                  old_serial, rel_timeout);
408    }
409
410    rwlock->pending_lock.lock();
411    rwlock->pending_writer_count--;
412    if (rwlock->pending_writer_count == 0) {
413      atomic_fetch_and_explicit(&rwlock->state, ~STATE_HAVE_PENDING_WRITERS_FLAG,
414                                memory_order_relaxed);
415    }
416    rwlock->pending_lock.unlock();
417
418    if (futex_ret == -ETIMEDOUT) {
419      return ETIMEDOUT;
420    }
421  }
422}
423
424int pthread_rwlock_rdlock(pthread_rwlock_t* rwlock_interface) {
425  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
426  // Avoid slowing down fast path of rdlock.
427  if (__predict_true(__pthread_rwlock_tryrdlock(rwlock) == 0)) {
428    return 0;
429  }
430  return __pthread_rwlock_timedrdlock(rwlock, NULL);
431}
432
433int pthread_rwlock_timedrdlock(pthread_rwlock_t* rwlock_interface, const timespec* abs_timeout) {
434  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
435
436  return __pthread_rwlock_timedrdlock(rwlock, abs_timeout);
437}
438
439int pthread_rwlock_tryrdlock(pthread_rwlock_t* rwlock_interface) {
440  return __pthread_rwlock_tryrdlock(__get_internal_rwlock(rwlock_interface));
441}
442
443int pthread_rwlock_wrlock(pthread_rwlock_t* rwlock_interface) {
444  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
445  // Avoid slowing down fast path of wrlock.
446  if (__predict_true(__pthread_rwlock_trywrlock(rwlock) == 0)) {
447    return 0;
448  }
449  return __pthread_rwlock_timedwrlock(rwlock, NULL);
450}
451
452int pthread_rwlock_timedwrlock(pthread_rwlock_t* rwlock_interface, const timespec* abs_timeout) {
453  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
454
455  return __pthread_rwlock_timedwrlock(rwlock, abs_timeout);
456}
457
458int pthread_rwlock_trywrlock(pthread_rwlock_t* rwlock_interface) {
459  return __pthread_rwlock_trywrlock(__get_internal_rwlock(rwlock_interface));
460}
461
462int pthread_rwlock_unlock(pthread_rwlock_t* rwlock_interface) {
463  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
464
465  int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
466  if (__state_owned_by_writer(old_state)) {
467    if (atomic_load_explicit(&rwlock->writer_tid, memory_order_relaxed) != __get_thread()->tid) {
468      return EPERM;
469    }
470    atomic_store_explicit(&rwlock->writer_tid, 0, memory_order_relaxed);
471    old_state = atomic_fetch_and_explicit(&rwlock->state, ~STATE_OWNED_BY_WRITER_FLAG,
472                                          memory_order_release);
473    if (!__state_have_pending_readers_or_writers(old_state)) {
474      return 0;
475    }
476
477  } else if (__state_owned_by_readers(old_state)) {
478    old_state = atomic_fetch_sub_explicit(&rwlock->state, STATE_READER_COUNT_CHANGE_STEP,
479                                          memory_order_release);
480    if (!__state_is_last_reader(old_state) || !__state_have_pending_readers_or_writers(old_state)) {
481      return 0;
482    }
483
484  } else {
485    return EPERM;
486  }
487
488  // Wake up pending readers or writers.
489  rwlock->pending_lock.lock();
490  if (rwlock->pending_writer_count != 0) {
491    rwlock->pending_writer_wakeup_serial++;
492    rwlock->pending_lock.unlock();
493
494    __futex_wake_ex(&rwlock->pending_writer_wakeup_serial, rwlock->pshared, 1);
495
496  } else if (rwlock->pending_reader_count != 0) {
497    rwlock->pending_reader_wakeup_serial++;
498    rwlock->pending_lock.unlock();
499
500    __futex_wake_ex(&rwlock->pending_reader_wakeup_serial, rwlock->pshared, INT_MAX);
501
502  } else {
503    // It happens when waiters are woken up by timeout.
504    rwlock->pending_lock.unlock();
505  }
506  return 0;
507}
508