1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *  * Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *  * Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *    the documentation and/or other materials provided with the
13 *    distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <errno.h>
30#include <stdatomic.h>
31#include <string.h>
32
33#include "pthread_internal.h"
34#include "private/bionic_futex.h"
35#include "private/bionic_lock.h"
36#include "private/bionic_time_conversions.h"
37
38/* Technical note:
39 *
40 * Possible states of a read/write lock:
41 *
42 *  - no readers and no writer (unlocked)
43 *  - one or more readers sharing the lock at the same time (read-locked)
44 *  - one writer holding the lock (write-lock)
45 *
46 * Additionally:
47 *  - trying to get the write-lock while there are any readers blocks
48 *  - trying to get the read-lock while there is a writer blocks
49 *  - a single thread can acquire the lock multiple times in read mode
50 *
51 *  - Posix states that behavior is undefined (may deadlock) if a thread tries
52 *    to acquire the lock
53 *      - in write mode while already holding the lock (whether in read or write mode)
54 *      - in read mode while already holding the lock in write mode.
55 *  - This implementation will return EDEADLK in "write after write" and "read after
56 *    write" cases and will deadlock in write after read case.
57 *
58 */
59
60// A rwlockattr is implemented as a 32-bit integer which has following fields:
61//  bits    name              description
62//   1     rwlock_kind       have rwlock preference like PTHREAD_RWLOCK_PREFER_READER_NP.
63//   0      process_shared    set to 1 if the rwlock is shared between processes.
64
65#define RWLOCKATTR_PSHARED_SHIFT 0
66#define RWLOCKATTR_KIND_SHIFT    1
67
68#define RWLOCKATTR_PSHARED_MASK  1
69#define RWLOCKATTR_KIND_MASK     2
70#define RWLOCKATTR_RESERVED_MASK (~3)
71
72static inline __always_inline __always_inline bool __rwlockattr_getpshared(const pthread_rwlockattr_t* attr) {
73  return (*attr & RWLOCKATTR_PSHARED_MASK) >> RWLOCKATTR_PSHARED_SHIFT;
74}
75
76static inline __always_inline __always_inline void __rwlockattr_setpshared(pthread_rwlockattr_t* attr, int pshared) {
77  *attr = (*attr & ~RWLOCKATTR_PSHARED_MASK) | (pshared << RWLOCKATTR_PSHARED_SHIFT);
78}
79
80static inline __always_inline int __rwlockattr_getkind(const pthread_rwlockattr_t* attr) {
81  return (*attr & RWLOCKATTR_KIND_MASK) >> RWLOCKATTR_KIND_SHIFT;
82}
83
84static inline __always_inline void __rwlockattr_setkind(pthread_rwlockattr_t* attr, int kind) {
85  *attr = (*attr & ~RWLOCKATTR_KIND_MASK) | (kind << RWLOCKATTR_KIND_SHIFT);
86}
87
88
89int pthread_rwlockattr_init(pthread_rwlockattr_t* attr) {
90  *attr = 0;
91  return 0;
92}
93
94int pthread_rwlockattr_destroy(pthread_rwlockattr_t* attr) {
95  *attr = -1;
96  return 0;
97}
98
99int pthread_rwlockattr_getpshared(const pthread_rwlockattr_t* attr, int* pshared) {
100  if (__rwlockattr_getpshared(attr)) {
101    *pshared = PTHREAD_PROCESS_SHARED;
102  } else {
103    *pshared = PTHREAD_PROCESS_PRIVATE;
104  }
105  return 0;
106}
107
108int pthread_rwlockattr_setpshared(pthread_rwlockattr_t* attr, int pshared) {
109  switch (pshared) {
110    case PTHREAD_PROCESS_PRIVATE:
111      __rwlockattr_setpshared(attr, 0);
112      return 0;
113    case PTHREAD_PROCESS_SHARED:
114      __rwlockattr_setpshared(attr, 1);
115      return 0;
116    default:
117      return EINVAL;
118  }
119}
120
121int pthread_rwlockattr_getkind_np(const pthread_rwlockattr_t* attr, int* pref) {
122  *pref = __rwlockattr_getkind(attr);
123  return 0;
124}
125
126int pthread_rwlockattr_setkind_np(pthread_rwlockattr_t* attr, int pref) {
127  switch (pref) {
128    case PTHREAD_RWLOCK_PREFER_READER_NP:   // Fall through.
129    case PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP:
130      __rwlockattr_setkind(attr, pref);
131      return 0;
132    default:
133      return EINVAL;
134  }
135}
136
137// A rwlock state is implemented as a 32-bit integer which has following rules:
138//  bits      name                              description
139//   31      owned_by_writer_flag              set to 1 if the lock is owned by a writer now.
140//  30-2     reader_count                      the count of readers holding the lock.
141//   1       have_pending_writers              set to 1 if having pending writers.
142//   0       have_pending_readers              set to 1 if having pending readers.
143
144#define STATE_HAVE_PENDING_READERS_SHIFT    0
145#define STATE_HAVE_PENDING_WRITERS_SHIFT    1
146#define STATE_READER_COUNT_SHIFT            2
147#define STATE_OWNED_BY_WRITER_SHIFT        31
148
149#define STATE_HAVE_PENDING_READERS_FLAG     (1 << STATE_HAVE_PENDING_READERS_SHIFT)
150#define STATE_HAVE_PENDING_WRITERS_FLAG     (1 << STATE_HAVE_PENDING_WRITERS_SHIFT)
151#define STATE_READER_COUNT_CHANGE_STEP  (1 << STATE_READER_COUNT_SHIFT)
152#define STATE_OWNED_BY_WRITER_FLAG      (1 << STATE_OWNED_BY_WRITER_SHIFT)
153
154#define STATE_HAVE_PENDING_READERS_OR_WRITERS_FLAG \
155          (STATE_HAVE_PENDING_READERS_FLAG | STATE_HAVE_PENDING_WRITERS_FLAG)
156
157struct pthread_rwlock_internal_t {
158  atomic_int state;
159  atomic_int writer_tid;
160
161  bool pshared;
162  bool writer_nonrecursive_preferred;
163  uint16_t __pad;
164
165// When a reader thread plans to suspend on the rwlock, it will add STATE_HAVE_PENDING_READERS_FLAG
166// in state, increase pending_reader_count, and wait on pending_reader_wakeup_serial. After woken
167// up, the reader thread decreases pending_reader_count, and the last pending reader thread should
168// remove STATE_HAVE_PENDING_READERS_FLAG in state. A pending writer thread works in a similar way,
169// except that it uses flag and members for writer threads.
170
171  Lock pending_lock;  // All pending members below are protected by pending_lock.
172  uint32_t pending_reader_count;  // Count of pending reader threads.
173  uint32_t pending_writer_count;  // Count of pending writer threads.
174  uint32_t pending_reader_wakeup_serial;  // Pending reader threads wait on this address by futex_wait.
175  uint32_t pending_writer_wakeup_serial;  // Pending writer threads wait on this address by futex_wait.
176
177#if defined(__LP64__)
178  char __reserved[20];
179#else
180  char __reserved[4];
181#endif
182};
183
184static inline __always_inline bool __state_owned_by_writer(int state) {
185  return state < 0;
186}
187
188static inline __always_inline bool __state_owned_by_readers(int state) {
189  // If state >= 0, the owned_by_writer_flag is not set.
190  // And if state >= STATE_READER_COUNT_CHANGE_STEP, the reader_count field is not empty.
191  return state >= STATE_READER_COUNT_CHANGE_STEP;
192}
193
194static inline __always_inline bool __state_owned_by_readers_or_writer(int state) {
195  return state < 0 || state >= STATE_READER_COUNT_CHANGE_STEP;
196}
197
198static inline __always_inline int __state_add_writer_flag(int state) {
199  return state | STATE_OWNED_BY_WRITER_FLAG;
200}
201
202static inline __always_inline bool __state_is_last_reader(int state) {
203  return (state >> STATE_READER_COUNT_SHIFT) == 1;
204}
205
206static inline __always_inline bool __state_have_pending_writers(int state) {
207  return state & STATE_HAVE_PENDING_WRITERS_FLAG;
208}
209
210static inline __always_inline bool __state_have_pending_readers_or_writers(int state) {
211  return state & STATE_HAVE_PENDING_READERS_OR_WRITERS_FLAG;
212}
213
214static_assert(sizeof(pthread_rwlock_t) == sizeof(pthread_rwlock_internal_t),
215              "pthread_rwlock_t should actually be pthread_rwlock_internal_t in implementation.");
216
217// For binary compatibility with old version of pthread_rwlock_t, we can't use more strict
218// alignment than 4-byte alignment.
219static_assert(alignof(pthread_rwlock_t) == 4,
220             "pthread_rwlock_t should fulfill the alignment requirement of pthread_rwlock_internal_t.");
221
222static inline __always_inline pthread_rwlock_internal_t* __get_internal_rwlock(pthread_rwlock_t* rwlock_interface) {
223  return reinterpret_cast<pthread_rwlock_internal_t*>(rwlock_interface);
224}
225
226int pthread_rwlock_init(pthread_rwlock_t* rwlock_interface, const pthread_rwlockattr_t* attr) {
227  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
228
229  memset(rwlock, 0, sizeof(pthread_rwlock_internal_t));
230
231  if (__predict_false(attr != NULL)) {
232    rwlock->pshared = __rwlockattr_getpshared(attr);
233    int kind = __rwlockattr_getkind(attr);
234    switch (kind) {
235      case PTHREAD_RWLOCK_PREFER_READER_NP:
236        rwlock->writer_nonrecursive_preferred = false;
237        break;
238      case PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP:
239        rwlock->writer_nonrecursive_preferred = true;
240        break;
241      default:
242        return EINVAL;
243    }
244    if ((*attr & RWLOCKATTR_RESERVED_MASK) != 0) {
245      return EINVAL;
246    }
247  }
248
249  atomic_init(&rwlock->state, 0);
250  rwlock->pending_lock.init(rwlock->pshared);
251  return 0;
252}
253
254int pthread_rwlock_destroy(pthread_rwlock_t* rwlock_interface) {
255  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
256
257  if (atomic_load_explicit(&rwlock->state, memory_order_relaxed) != 0) {
258    return EBUSY;
259  }
260  return 0;
261}
262
263static inline __always_inline bool __can_acquire_read_lock(int old_state,
264                                                             bool writer_nonrecursive_preferred) {
265  // If writer is preferred with nonrecursive reader, we prevent further readers from acquiring
266  // the lock when there are writers waiting for the lock.
267  bool cannot_apply = __state_owned_by_writer(old_state) ||
268                      (writer_nonrecursive_preferred && __state_have_pending_writers(old_state));
269  return !cannot_apply;
270}
271
272static inline __always_inline int __pthread_rwlock_tryrdlock(pthread_rwlock_internal_t* rwlock) {
273  int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
274
275  while (__predict_true(__can_acquire_read_lock(old_state, rwlock->writer_nonrecursive_preferred))) {
276
277    int new_state = old_state + STATE_READER_COUNT_CHANGE_STEP;
278    if (__predict_false(!__state_owned_by_readers(new_state))) { // Happens when reader count overflows.
279      return EAGAIN;
280    }
281    if (__predict_true(atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state, new_state,
282                                              memory_order_acquire, memory_order_relaxed))) {
283      return 0;
284    }
285  }
286  return EBUSY;
287}
288
289static int __pthread_rwlock_timedrdlock(pthread_rwlock_internal_t* rwlock,
290                                        const timespec* abs_timeout_or_null) {
291
292  if (atomic_load_explicit(&rwlock->writer_tid, memory_order_relaxed) == __get_thread()->tid) {
293    return EDEADLK;
294  }
295
296  while (true) {
297    int result = __pthread_rwlock_tryrdlock(rwlock);
298    if (result == 0 || result == EAGAIN) {
299      return result;
300    }
301    result = check_timespec(abs_timeout_or_null, true);
302    if (result != 0) {
303      return result;
304    }
305
306    int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
307    if (__can_acquire_read_lock(old_state, rwlock->writer_nonrecursive_preferred)) {
308      continue;
309    }
310
311    rwlock->pending_lock.lock();
312    rwlock->pending_reader_count++;
313
314    // We rely on the fact that all atomic exchange operations on the same object (here it is
315    // rwlock->state) always appear to occur in a single total order. If the pending flag is added
316    // before unlocking, the unlocking thread will wakeup the waiter. Otherwise, we will see the
317    // state is unlocked and will not wait anymore.
318    old_state = atomic_fetch_or_explicit(&rwlock->state, STATE_HAVE_PENDING_READERS_FLAG,
319                                         memory_order_relaxed);
320
321    int old_serial = rwlock->pending_reader_wakeup_serial;
322    rwlock->pending_lock.unlock();
323
324    int futex_result = 0;
325    if (!__can_acquire_read_lock(old_state, rwlock->writer_nonrecursive_preferred)) {
326      futex_result = __futex_wait_ex(&rwlock->pending_reader_wakeup_serial, rwlock->pshared,
327                                  old_serial, true, abs_timeout_or_null);
328    }
329
330    rwlock->pending_lock.lock();
331    rwlock->pending_reader_count--;
332    if (rwlock->pending_reader_count == 0) {
333      atomic_fetch_and_explicit(&rwlock->state, ~STATE_HAVE_PENDING_READERS_FLAG,
334                                memory_order_relaxed);
335    }
336    rwlock->pending_lock.unlock();
337
338    if (futex_result == -ETIMEDOUT) {
339      return ETIMEDOUT;
340    }
341  }
342}
343
344static inline __always_inline bool __can_acquire_write_lock(int old_state) {
345  return !__state_owned_by_readers_or_writer(old_state);
346}
347
348static inline __always_inline int __pthread_rwlock_trywrlock(pthread_rwlock_internal_t* rwlock) {
349  int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
350
351  while (__predict_true(__can_acquire_write_lock(old_state))) {
352    if (__predict_true(atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state,
353          __state_add_writer_flag(old_state), memory_order_acquire, memory_order_relaxed))) {
354
355      atomic_store_explicit(&rwlock->writer_tid, __get_thread()->tid, memory_order_relaxed);
356      return 0;
357    }
358  }
359  return EBUSY;
360}
361
362static int __pthread_rwlock_timedwrlock(pthread_rwlock_internal_t* rwlock,
363                                        const timespec* abs_timeout_or_null) {
364
365  if (atomic_load_explicit(&rwlock->writer_tid, memory_order_relaxed) == __get_thread()->tid) {
366    return EDEADLK;
367  }
368  while (true) {
369    int result = __pthread_rwlock_trywrlock(rwlock);
370    if (result == 0) {
371      return result;
372    }
373    result = check_timespec(abs_timeout_or_null, true);
374    if (result != 0) {
375      return result;
376    }
377
378    int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
379    if (__can_acquire_write_lock(old_state)) {
380      continue;
381    }
382
383    rwlock->pending_lock.lock();
384    rwlock->pending_writer_count++;
385
386    old_state = atomic_fetch_or_explicit(&rwlock->state, STATE_HAVE_PENDING_WRITERS_FLAG,
387                                         memory_order_relaxed);
388
389    int old_serial = rwlock->pending_writer_wakeup_serial;
390    rwlock->pending_lock.unlock();
391
392    int futex_result = 0;
393    if (!__can_acquire_write_lock(old_state)) {
394      futex_result = __futex_wait_ex(&rwlock->pending_writer_wakeup_serial, rwlock->pshared,
395                                  old_serial, true, abs_timeout_or_null);
396    }
397
398    rwlock->pending_lock.lock();
399    rwlock->pending_writer_count--;
400    if (rwlock->pending_writer_count == 0) {
401      atomic_fetch_and_explicit(&rwlock->state, ~STATE_HAVE_PENDING_WRITERS_FLAG,
402                                memory_order_relaxed);
403    }
404    rwlock->pending_lock.unlock();
405
406    if (futex_result == -ETIMEDOUT) {
407      return ETIMEDOUT;
408    }
409  }
410}
411
412int pthread_rwlock_rdlock(pthread_rwlock_t* rwlock_interface) {
413  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
414  // Avoid slowing down fast path of rdlock.
415  if (__predict_true(__pthread_rwlock_tryrdlock(rwlock) == 0)) {
416    return 0;
417  }
418  return __pthread_rwlock_timedrdlock(rwlock, nullptr);
419}
420
421int pthread_rwlock_timedrdlock(pthread_rwlock_t* rwlock_interface, const timespec* abs_timeout) {
422  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
423
424  return __pthread_rwlock_timedrdlock(rwlock, abs_timeout);
425}
426
427int pthread_rwlock_tryrdlock(pthread_rwlock_t* rwlock_interface) {
428  return __pthread_rwlock_tryrdlock(__get_internal_rwlock(rwlock_interface));
429}
430
431int pthread_rwlock_wrlock(pthread_rwlock_t* rwlock_interface) {
432  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
433  // Avoid slowing down fast path of wrlock.
434  if (__predict_true(__pthread_rwlock_trywrlock(rwlock) == 0)) {
435    return 0;
436  }
437  return __pthread_rwlock_timedwrlock(rwlock, nullptr);
438}
439
440int pthread_rwlock_timedwrlock(pthread_rwlock_t* rwlock_interface, const timespec* abs_timeout) {
441  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
442
443  return __pthread_rwlock_timedwrlock(rwlock, abs_timeout);
444}
445
446int pthread_rwlock_trywrlock(pthread_rwlock_t* rwlock_interface) {
447  return __pthread_rwlock_trywrlock(__get_internal_rwlock(rwlock_interface));
448}
449
450int pthread_rwlock_unlock(pthread_rwlock_t* rwlock_interface) {
451  pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface);
452
453  int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed);
454  if (__state_owned_by_writer(old_state)) {
455    if (atomic_load_explicit(&rwlock->writer_tid, memory_order_relaxed) != __get_thread()->tid) {
456      return EPERM;
457    }
458    atomic_store_explicit(&rwlock->writer_tid, 0, memory_order_relaxed);
459    old_state = atomic_fetch_and_explicit(&rwlock->state, ~STATE_OWNED_BY_WRITER_FLAG,
460                                          memory_order_release);
461    if (!__state_have_pending_readers_or_writers(old_state)) {
462      return 0;
463    }
464
465  } else if (__state_owned_by_readers(old_state)) {
466    old_state = atomic_fetch_sub_explicit(&rwlock->state, STATE_READER_COUNT_CHANGE_STEP,
467                                          memory_order_release);
468    if (!__state_is_last_reader(old_state) || !__state_have_pending_readers_or_writers(old_state)) {
469      return 0;
470    }
471
472  } else {
473    return EPERM;
474  }
475
476  // Wake up pending readers or writers.
477  rwlock->pending_lock.lock();
478  if (rwlock->pending_writer_count != 0) {
479    rwlock->pending_writer_wakeup_serial++;
480    rwlock->pending_lock.unlock();
481
482    __futex_wake_ex(&rwlock->pending_writer_wakeup_serial, rwlock->pshared, 1);
483
484  } else if (rwlock->pending_reader_count != 0) {
485    rwlock->pending_reader_wakeup_serial++;
486    rwlock->pending_lock.unlock();
487
488    __futex_wake_ex(&rwlock->pending_reader_wakeup_serial, rwlock->pshared, INT_MAX);
489
490  } else {
491    // It happens when waiters are woken up by timeout.
492    rwlock->pending_lock.unlock();
493  }
494  return 0;
495}
496