178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--------------------------------------------------------------------*/
378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--- Linux ticket lock implementation         ticket-lock-linux.c ---*/
478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*---                                                              ---*/
578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--- Guarantees fair scheduling even if multiple threads are      ---*/
678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--- runnable at the same time on a multicore system. Has been    ---*/
778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--- observed to cause a slow-down compared to the generic        ---*/
878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--- scheduler lock with CPU frequency scaling enabled. Makes     ---*/
978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--- Valgrind slightly faster if CPU frequency scaling has been   ---*/
1078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--- disabled. See also http://bugs.kde.org/show_bug.cgi?id=270006---*/
1178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--------------------------------------------------------------------*/
1278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
1378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*
1478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   This file is part of Valgrind, a dynamic binary instrumentation
1578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   framework.
1678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
17ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   Copyright (C) 2011-2017 Bart Van Assche <bvanassche@acm.org>.
1878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
1978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   This program is free software; you can redistribute it and/or
2078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   modify it under the terms of the GNU General Public License as
2178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   published by the Free Software Foundation; either version 2 of the
2278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   License, or (at your option) any later version.
2378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
2478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   This program is distributed in the hope that it will be useful, but
2578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   WITHOUT ANY WARRANTY; without even the implied warranty of
2678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
2778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   General Public License for more details.
2878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
2978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   You should have received a copy of the GNU General Public License
3078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   along with this program; if not, write to the Free Software
3178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
3278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   02111-1307, USA.
3378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
3478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   The GNU General Public License is contained in the file COPYING.
3578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart*/
3678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
3778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "pub_core_basics.h"
3878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "pub_core_libcassert.h"
3978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "pub_core_libcbase.h"     // VG_(memset)()
4078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "pub_core_libcprint.h"
4178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "pub_core_syscall.h"
4278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "pub_core_vki.h"
4378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "pub_core_vkiscnums.h"    // __NR_futex
44c91f58449e6fc2a4ce0851639a342c4277612fbbflorian#include "pub_core_libcproc.h"
45c91f58449e6fc2a4ce0851639a342c4277612fbbflorian#include "pub_core_mallocfree.h"
46c91f58449e6fc2a4ce0851639a342c4277612fbbflorian#include "pub_core_threadstate.h"
47c91f58449e6fc2a4ce0851639a342c4277612fbbflorian#include "pub_core_inner.h"
48277eafff8cbea7292851b4bf4d5b6c18385578a0philippe#if defined(ENABLE_INNER_CLIENT_REQUEST)
49277eafff8cbea7292851b4bf4d5b6c18385578a0philippe#include "helgrind/helgrind.h"
50277eafff8cbea7292851b4bf4d5b6c18385578a0philippe#endif
5178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "priv_sched-lock.h"
5278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "priv_sched-lock-impl.h"
5378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
5478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#define TL_FUTEX_COUNT_LOG2 4
5578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#define TL_FUTEX_COUNT (1U << TL_FUTEX_COUNT_LOG2)
5678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#define TL_FUTEX_MASK (TL_FUTEX_COUNT - 1)
5778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
5878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartstruct sched_lock {
5978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   volatile unsigned head;
6078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   volatile unsigned tail;
6178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   volatile unsigned futex[TL_FUTEX_COUNT];
6278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   int owner;
6378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart};
6478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
6578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#if 1
6678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartstatic Bool s_debug;
6778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#else
6878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartstatic Bool s_debug = True;
6978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#endif
7078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
71cd19e99a2fc1adf5a142664d9604d18b51b646ecflorianstatic const HChar *get_sched_lock_name(void)
7278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart{
7378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   return "ticket lock";
7478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart}
7578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
7678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartstatic struct sched_lock *create_sched_lock(void)
7778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart{
7878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   struct sched_lock *p;
7978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
8078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   p = VG_(malloc)("sched_lock", sizeof(*p));
8168790a73bcb290746a5b34c44538c3b2728eaaecflorian
8268790a73bcb290746a5b34c44538c3b2728eaaecflorian   // The futex syscall requires that a futex takes four bytes.
8368790a73bcb290746a5b34c44538c3b2728eaaecflorian   vg_assert(sizeof(p->futex[0]) == 4);
8468790a73bcb290746a5b34c44538c3b2728eaaecflorian
852857285fdec67e0b464104d7f35bf3cd26a10258florian   VG_(memset)(p, 0, sizeof(*p));
8668790a73bcb290746a5b34c44538c3b2728eaaecflorian
87277eafff8cbea7292851b4bf4d5b6c18385578a0philippe   INNER_REQUEST(ANNOTATE_RWLOCK_CREATE(p));
883f6e9d99048d516266b6d8928a159785771eb054bart   INNER_REQUEST(ANNOTATE_BENIGN_RACE_SIZED(&p->futex, sizeof(p->futex), ""));
8978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   return p;
9078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart}
9178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
9278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartstatic void destroy_sched_lock(struct sched_lock *p)
9378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart{
94277eafff8cbea7292851b4bf4d5b6c18385578a0philippe   INNER_REQUEST(ANNOTATE_RWLOCK_DESTROY(p));
9578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   VG_(free)(p);
9678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart}
9778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
9878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartstatic int get_sched_lock_owner(struct sched_lock *p)
9978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart{
10078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   return p->owner;
10178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart}
10278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
10378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*
10478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * Acquire ticket lock. Increment the tail of the queue and use the original
10578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * value as the ticket value. Wait until the head of the queue equals the
10678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * ticket value. The futex used to wait depends on the ticket value in order
10778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * to avoid that all threads get woken up every time a ticket lock is
10878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * released. That last effect is sometimes called the "thundering herd"
10978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * effect.
11078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart *
11178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * See also Nick Piggin, x86: FIFO ticket spinlocks, Linux kernel mailing list
11278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * (http://lkml.org/lkml/2007/11/1/125) for more info.
11378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart */
11478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartstatic void acquire_sched_lock(struct sched_lock *p)
11578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart{
11678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   unsigned ticket, futex_value;
11778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   volatile unsigned *futex;
11878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   SysRes sres;
11978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
12078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   ticket = __sync_fetch_and_add(&p->tail, 1);
12178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   futex = &p->futex[ticket & TL_FUTEX_MASK];
12278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   if (s_debug)
123c6e5d76e9eea8625f385ff844545c688c91938daflorian      VG_(printf)("[%d/%d] acquire: ticket %u\n", VG_(getpid)(),
12478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart                  VG_(gettid)(), ticket);
12578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   for (;;) {
12678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart      futex_value = *futex;
1270c30a7b6e2ad70dff1924d851ba94f73d309b11abart      __sync_synchronize();
12878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart      if (ticket == p->head)
12978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart         break;
13078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart      if (s_debug)
131c6e5d76e9eea8625f385ff844545c688c91938daflorian         VG_(printf)("[%d/%d] acquire: ticket %u - waiting until"
132c6e5d76e9eea8625f385ff844545c688c91938daflorian                     " futex[%ld] != %u\n", VG_(getpid)(),
13378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart                     VG_(gettid)(), ticket, (long)(futex - p->futex),
13478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart                     futex_value);
13578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart      sres = VG_(do_syscall3)(__NR_futex, (UWord)futex,
13678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart                              VKI_FUTEX_WAIT | VKI_FUTEX_PRIVATE_FLAG,
13778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart                              futex_value);
138bd664fdd6b51670786bf058ae23e3f79a9758487sewardj      if (sr_isError(sres) && sr_Err(sres) != VKI_EAGAIN) {
139c6e5d76e9eea8625f385ff844545c688c91938daflorian         VG_(printf)("futex_wait() returned error code %lu\n", sr_Err(sres));
14078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart         vg_assert(False);
14178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart      }
14278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   }
1430c30a7b6e2ad70dff1924d851ba94f73d309b11abart   __sync_synchronize();
144277eafff8cbea7292851b4bf4d5b6c18385578a0philippe   INNER_REQUEST(ANNOTATE_RWLOCK_ACQUIRED(p, /*is_w*/1));
14578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   vg_assert(p->owner == 0);
14678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   p->owner = VG_(gettid)();
14778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart}
14878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
14978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*
15078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * Release a ticket lock by incrementing the head of the queue. Only generate
15178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * a thread wakeup signal if at least one thread is waiting. If the queue tail
15278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * matches the wakeup_ticket value, no threads have to be woken up.
15378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart *
15478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * Note: tail will only be read after head has been incremented since both are
15578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * declared as volatile and since the __sync...() functions imply a memory
15678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * barrier.
15778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart */
15878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartstatic void release_sched_lock(struct sched_lock *p)
15978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart{
16078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   unsigned wakeup_ticket, futex_value;
16178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   volatile unsigned *futex;
16278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   SysRes sres;
16378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
16478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   vg_assert(p->owner != 0);
16578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   p->owner = 0;
166277eafff8cbea7292851b4bf4d5b6c18385578a0philippe   INNER_REQUEST(ANNOTATE_RWLOCK_RELEASED(p, /*is_w*/1));
16778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   wakeup_ticket = __sync_fetch_and_add(&p->head, 1) + 1;
16878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   if (p->tail != wakeup_ticket) {
16978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart      futex = &p->futex[wakeup_ticket & TL_FUTEX_MASK];
17078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart      futex_value = __sync_fetch_and_add(futex, 1);
17178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart      if (s_debug)
172c6e5d76e9eea8625f385ff844545c688c91938daflorian         VG_(printf)("[%d/%d] release: waking up ticket %u (futex[%ld] = %u)"
17378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart                     "\n", VG_(getpid)(), VG_(gettid)(), wakeup_ticket,
17478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart                     (long)(futex - p->futex), futex_value);
17578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart      sres = VG_(do_syscall3)(__NR_futex, (UWord)futex,
17678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart                              VKI_FUTEX_WAKE | VKI_FUTEX_PRIVATE_FLAG,
17778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart                              0x7fffffff);
17878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart      vg_assert(!sr_isError(sres));
17978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   } else {
18078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart      if (s_debug)
181c6e5d76e9eea8625f385ff844545c688c91938daflorian         VG_(printf)("[%d/%d] release: no thread is waiting for ticket %u\n",
18278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart                     VG_(getpid)(), VG_(gettid)(), wakeup_ticket);
18378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   }
18478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart}
18578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart
18678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartconst struct sched_lock_ops ML_(linux_ticket_lock_ops) = {
18778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   .get_sched_lock_name  = get_sched_lock_name,
18878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   .create_sched_lock    = create_sched_lock,
18978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   .destroy_sched_lock   = destroy_sched_lock,
19078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   .get_sched_lock_owner = get_sched_lock_owner,
19178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   .acquire_sched_lock   = acquire_sched_lock,
19278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart   .release_sched_lock   = release_sched_lock,
19378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart};
194