1663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
2663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--------------------------------------------------------------------*/
3663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--- Linux ticket lock implementation         ticket-lock-linux.c ---*/
4663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*---                                                              ---*/
5663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--- Guarantees fair scheduling even if multiple threads are      ---*/
6663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--- runnable at the same time on a multicore system. Has been    ---*/
7663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--- observed to cause a slow-down compared to the generic        ---*/
8663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--- scheduler lock with CPU frequency scaling enabled. Makes     ---*/
9663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--- Valgrind slightly faster if CPU frequency scaling has been   ---*/
10663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--- disabled. See also http://bugs.kde.org/show_bug.cgi?id=270006---*/
11663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--------------------------------------------------------------------*/
12663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
13663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*
14663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   This file is part of Valgrind, a dynamic binary instrumentation
15663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   framework.
16663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
17436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   Copyright (C) 2011-2013 Bart Van Assche <bvanassche@acm.org>.
18663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
19663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   This program is free software; you can redistribute it and/or
20663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   modify it under the terms of the GNU General Public License as
21663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   published by the Free Software Foundation; either version 2 of the
22663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   License, or (at your option) any later version.
23663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
24663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   This program is distributed in the hope that it will be useful, but
25663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   WITHOUT ANY WARRANTY; without even the implied warranty of
26663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   General Public License for more details.
28663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
29663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   You should have received a copy of the GNU General Public License
30663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   along with this program; if not, write to the Free Software
31663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
32663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   02111-1307, USA.
33663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
34663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   The GNU General Public License is contained in the file COPYING.
35663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng*/
36663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
37663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "pub_core_basics.h"
38663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "pub_core_libcassert.h"
39663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "pub_core_libcbase.h"     // VG_(memset)()
40663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "pub_core_libcprint.h"
41663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "pub_core_syscall.h"
42663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "pub_core_vki.h"
43663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "pub_core_vkiscnums.h"    // __NR_futex
44436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#include "pub_core_libcproc.h"
45436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#include "pub_core_mallocfree.h"
46436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#include "pub_core_threadstate.h"
47436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#include "pub_core_inner.h"
48663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#if defined(ENABLE_INNER_CLIENT_REQUEST)
49663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "helgrind/helgrind.h"
50663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#endif
51663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "priv_sched-lock.h"
52663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "priv_sched-lock-impl.h"
53663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
54663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#define TL_FUTEX_COUNT_LOG2 4
55663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#define TL_FUTEX_COUNT (1U << TL_FUTEX_COUNT_LOG2)
56663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#define TL_FUTEX_MASK (TL_FUTEX_COUNT - 1)
57663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
58663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstruct sched_lock {
59663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   volatile unsigned head;
60663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   volatile unsigned tail;
61663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   volatile unsigned futex[TL_FUTEX_COUNT];
62663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   int owner;
63663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng};
64663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
65663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#if 1
66663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic Bool s_debug;
67663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#else
68663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic Bool s_debug = True;
69663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#endif
70663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
71436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovstatic const HChar *get_sched_lock_name(void)
72663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{
73663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   return "ticket lock";
74663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng}
75663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
76663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic struct sched_lock *create_sched_lock(void)
77663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{
78663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   struct sched_lock *p;
79663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
80663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   p = VG_(malloc)("sched_lock", sizeof(*p));
81663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   if (p) {
82663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      // The futex syscall requires that a futex takes four bytes.
83663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      vg_assert(sizeof(p->futex[0]) == 4);
84663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
85663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      p->head = 0;
86663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      p->tail = 0;
87663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      VG_(memset)((void*)p->futex, 0, sizeof(p->futex));
88663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      p->owner = 0;
89663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   }
90663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   INNER_REQUEST(ANNOTATE_RWLOCK_CREATE(p));
91663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   INNER_REQUEST(ANNOTATE_BENIGN_RACE_SIZED(&p->futex, sizeof(p->futex), ""));
92663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   return p;
93663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng}
94663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
95663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic void destroy_sched_lock(struct sched_lock *p)
96663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{
97663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   INNER_REQUEST(ANNOTATE_RWLOCK_DESTROY(p));
98663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   VG_(free)(p);
99663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng}
100663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
101663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic int get_sched_lock_owner(struct sched_lock *p)
102663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{
103663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   return p->owner;
104663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng}
105663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
106663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*
107663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * Acquire ticket lock. Increment the tail of the queue and use the original
108663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * value as the ticket value. Wait until the head of the queue equals the
109663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * ticket value. The futex used to wait depends on the ticket value in order
110663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * to avoid that all threads get woken up every time a ticket lock is
111663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * released. That last effect is sometimes called the "thundering herd"
112663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * effect.
113663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng *
114663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * See also Nick Piggin, x86: FIFO ticket spinlocks, Linux kernel mailing list
115663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * (http://lkml.org/lkml/2007/11/1/125) for more info.
116663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng */
117663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic void acquire_sched_lock(struct sched_lock *p)
118663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{
119663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   unsigned ticket, futex_value;
120663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   volatile unsigned *futex;
121663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   SysRes sres;
122663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
123663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   ticket = __sync_fetch_and_add(&p->tail, 1);
124663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   futex = &p->futex[ticket & TL_FUTEX_MASK];
125663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   if (s_debug)
126663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      VG_(printf)("[%d/%d] acquire: ticket %d\n", VG_(getpid)(),
127663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                  VG_(gettid)(), ticket);
128663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   for (;;) {
129663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      futex_value = *futex;
130663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      __sync_synchronize();
131663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      if (ticket == p->head)
132663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         break;
133663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      if (s_debug)
134663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         VG_(printf)("[%d/%d] acquire: ticket %d - waiting until"
135663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                     " futex[%ld] != %d\n", VG_(getpid)(),
136663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                     VG_(gettid)(), ticket, (long)(futex - p->futex),
137663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                     futex_value);
138663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      sres = VG_(do_syscall3)(__NR_futex, (UWord)futex,
139663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                              VKI_FUTEX_WAIT | VKI_FUTEX_PRIVATE_FLAG,
140663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                              futex_value);
141663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      if (sr_isError(sres) && sres._val != VKI_EAGAIN) {
142663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         VG_(printf)("futex_wait() returned error code %ld\n", sres._val);
143663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         vg_assert(False);
144663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      }
145663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   }
146663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   __sync_synchronize();
147663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   INNER_REQUEST(ANNOTATE_RWLOCK_ACQUIRED(p, /*is_w*/1));
148663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   vg_assert(p->owner == 0);
149663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   p->owner = VG_(gettid)();
150663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng}
151663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
152663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*
153663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * Release a ticket lock by incrementing the head of the queue. Only generate
154663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * a thread wakeup signal if at least one thread is waiting. If the queue tail
155663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * matches the wakeup_ticket value, no threads have to be woken up.
156663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng *
157663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * Note: tail will only be read after head has been incremented since both are
158663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * declared as volatile and since the __sync...() functions imply a memory
159663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * barrier.
160663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng */
161663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic void release_sched_lock(struct sched_lock *p)
162663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{
163663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   unsigned wakeup_ticket, futex_value;
164663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   volatile unsigned *futex;
165663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   SysRes sres;
166663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
167663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   vg_assert(p->owner != 0);
168663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   p->owner = 0;
169663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   INNER_REQUEST(ANNOTATE_RWLOCK_RELEASED(p, /*is_w*/1));
170663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   wakeup_ticket = __sync_fetch_and_add(&p->head, 1) + 1;
171663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   if (p->tail != wakeup_ticket) {
172663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      futex = &p->futex[wakeup_ticket & TL_FUTEX_MASK];
173663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      futex_value = __sync_fetch_and_add(futex, 1);
174663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      if (s_debug)
175663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         VG_(printf)("[%d/%d] release: waking up ticket %d (futex[%ld] = %d)"
176663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                     "\n", VG_(getpid)(), VG_(gettid)(), wakeup_ticket,
177663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                     (long)(futex - p->futex), futex_value);
178663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      sres = VG_(do_syscall3)(__NR_futex, (UWord)futex,
179663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                              VKI_FUTEX_WAKE | VKI_FUTEX_PRIVATE_FLAG,
180663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                              0x7fffffff);
181663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      vg_assert(!sr_isError(sres));
182663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   } else {
183663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      if (s_debug)
184663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         VG_(printf)("[%d/%d] release: no thread is waiting for ticket %d\n",
185663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                     VG_(getpid)(), VG_(gettid)(), wakeup_ticket);
186663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   }
187663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng}
188663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
189663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengconst struct sched_lock_ops ML_(linux_ticket_lock_ops) = {
190663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   .get_sched_lock_name  = get_sched_lock_name,
191663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   .create_sched_lock    = create_sched_lock,
192663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   .destroy_sched_lock   = destroy_sched_lock,
193663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   .get_sched_lock_owner = get_sched_lock_owner,
194663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   .acquire_sched_lock   = acquire_sched_lock,
195663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   .release_sched_lock   = release_sched_lock,
196663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng};
197