1
2/*--------------------------------------------------------------------*/
3/*--- Linux ticket lock implementation         ticket-lock-linux.c ---*/
4/*---                                                              ---*/
5/*--- Guarantees fair scheduling even if multiple threads are      ---*/
6/*--- runnable at the same time on a multicore system. Has been    ---*/
7/*--- observed to cause a slow-down compared to the generic        ---*/
8/*--- scheduler lock with CPU frequency scaling enabled. Makes     ---*/
9/*--- Valgrind slightly faster if CPU frequency scaling has been   ---*/
10/*--- disabled. See also http://bugs.kde.org/show_bug.cgi?id=270006---*/
11/*--------------------------------------------------------------------*/
12
13/*
14   This file is part of Valgrind, a dynamic binary instrumentation
15   framework.
16
17   Copyright (C) 2011-2013 Bart Van Assche <bvanassche@acm.org>.
18
19   This program is free software; you can redistribute it and/or
20   modify it under the terms of the GNU General Public License as
21   published by the Free Software Foundation; either version 2 of the
22   License, or (at your option) any later version.
23
24   This program is distributed in the hope that it will be useful, but
25   WITHOUT ANY WARRANTY; without even the implied warranty of
26   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27   General Public License for more details.
28
29   You should have received a copy of the GNU General Public License
30   along with this program; if not, write to the Free Software
31   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
32   02111-1307, USA.
33
34   The GNU General Public License is contained in the file COPYING.
35*/
36
37#include "pub_core_basics.h"
38#include "pub_core_libcassert.h"
39#include "pub_core_libcbase.h"     // VG_(memset)()
40#include "pub_core_libcprint.h"
41#include "pub_core_syscall.h"
42#include "pub_core_vki.h"
43#include "pub_core_vkiscnums.h"    // __NR_futex
44#include "pub_core_libcproc.h"
45#include "pub_core_mallocfree.h"
46#include "pub_core_threadstate.h"
47#include "pub_core_inner.h"
48#if defined(ENABLE_INNER_CLIENT_REQUEST)
49#include "helgrind/helgrind.h"
50#endif
51#include "priv_sched-lock.h"
52#include "priv_sched-lock-impl.h"
53
54#define TL_FUTEX_COUNT_LOG2 4
55#define TL_FUTEX_COUNT (1U << TL_FUTEX_COUNT_LOG2)
56#define TL_FUTEX_MASK (TL_FUTEX_COUNT - 1)
57
58struct sched_lock {
59   volatile unsigned head;
60   volatile unsigned tail;
61   volatile unsigned futex[TL_FUTEX_COUNT];
62   int owner;
63};
64
65#if 1
66static Bool s_debug;
67#else
68static Bool s_debug = True;
69#endif
70
71static const HChar *get_sched_lock_name(void)
72{
73   return "ticket lock";
74}
75
76static struct sched_lock *create_sched_lock(void)
77{
78   struct sched_lock *p;
79
80   p = VG_(malloc)("sched_lock", sizeof(*p));
81   if (p) {
82      // The futex syscall requires that a futex takes four bytes.
83      vg_assert(sizeof(p->futex[0]) == 4);
84
85      p->head = 0;
86      p->tail = 0;
87      VG_(memset)((void*)p->futex, 0, sizeof(p->futex));
88      p->owner = 0;
89   }
90   INNER_REQUEST(ANNOTATE_RWLOCK_CREATE(p));
91   INNER_REQUEST(ANNOTATE_BENIGN_RACE_SIZED(&p->futex, sizeof(p->futex), ""));
92   return p;
93}
94
95static void destroy_sched_lock(struct sched_lock *p)
96{
97   INNER_REQUEST(ANNOTATE_RWLOCK_DESTROY(p));
98   VG_(free)(p);
99}
100
101static int get_sched_lock_owner(struct sched_lock *p)
102{
103   return p->owner;
104}
105
106/*
107 * Acquire ticket lock. Increment the tail of the queue and use the original
108 * value as the ticket value. Wait until the head of the queue equals the
109 * ticket value. The futex used to wait depends on the ticket value in order
110 * to avoid that all threads get woken up every time a ticket lock is
111 * released. That last effect is sometimes called the "thundering herd"
112 * effect.
113 *
114 * See also Nick Piggin, x86: FIFO ticket spinlocks, Linux kernel mailing list
115 * (http://lkml.org/lkml/2007/11/1/125) for more info.
116 */
117static void acquire_sched_lock(struct sched_lock *p)
118{
119   unsigned ticket, futex_value;
120   volatile unsigned *futex;
121   SysRes sres;
122
123   ticket = __sync_fetch_and_add(&p->tail, 1);
124   futex = &p->futex[ticket & TL_FUTEX_MASK];
125   if (s_debug)
126      VG_(printf)("[%d/%d] acquire: ticket %d\n", VG_(getpid)(),
127                  VG_(gettid)(), ticket);
128   for (;;) {
129      futex_value = *futex;
130      __sync_synchronize();
131      if (ticket == p->head)
132         break;
133      if (s_debug)
134         VG_(printf)("[%d/%d] acquire: ticket %d - waiting until"
135                     " futex[%ld] != %d\n", VG_(getpid)(),
136                     VG_(gettid)(), ticket, (long)(futex - p->futex),
137                     futex_value);
138      sres = VG_(do_syscall3)(__NR_futex, (UWord)futex,
139                              VKI_FUTEX_WAIT | VKI_FUTEX_PRIVATE_FLAG,
140                              futex_value);
141      if (sr_isError(sres) && sres._val != VKI_EAGAIN) {
142         VG_(printf)("futex_wait() returned error code %ld\n", sres._val);
143         vg_assert(False);
144      }
145   }
146   __sync_synchronize();
147   INNER_REQUEST(ANNOTATE_RWLOCK_ACQUIRED(p, /*is_w*/1));
148   vg_assert(p->owner == 0);
149   p->owner = VG_(gettid)();
150}
151
152/*
153 * Release a ticket lock by incrementing the head of the queue. Only generate
154 * a thread wakeup signal if at least one thread is waiting. If the queue tail
155 * matches the wakeup_ticket value, no threads have to be woken up.
156 *
157 * Note: tail will only be read after head has been incremented since both are
158 * declared as volatile and since the __sync...() functions imply a memory
159 * barrier.
160 */
161static void release_sched_lock(struct sched_lock *p)
162{
163   unsigned wakeup_ticket, futex_value;
164   volatile unsigned *futex;
165   SysRes sres;
166
167   vg_assert(p->owner != 0);
168   p->owner = 0;
169   INNER_REQUEST(ANNOTATE_RWLOCK_RELEASED(p, /*is_w*/1));
170   wakeup_ticket = __sync_fetch_and_add(&p->head, 1) + 1;
171   if (p->tail != wakeup_ticket) {
172      futex = &p->futex[wakeup_ticket & TL_FUTEX_MASK];
173      futex_value = __sync_fetch_and_add(futex, 1);
174      if (s_debug)
175         VG_(printf)("[%d/%d] release: waking up ticket %d (futex[%ld] = %d)"
176                     "\n", VG_(getpid)(), VG_(gettid)(), wakeup_ticket,
177                     (long)(futex - p->futex), futex_value);
178      sres = VG_(do_syscall3)(__NR_futex, (UWord)futex,
179                              VKI_FUTEX_WAKE | VKI_FUTEX_PRIVATE_FLAG,
180                              0x7fffffff);
181      vg_assert(!sr_isError(sres));
182   } else {
183      if (s_debug)
184         VG_(printf)("[%d/%d] release: no thread is waiting for ticket %d\n",
185                     VG_(getpid)(), VG_(gettid)(), wakeup_ticket);
186   }
187}
188
189const struct sched_lock_ops ML_(linux_ticket_lock_ops) = {
190   .get_sched_lock_name  = get_sched_lock_name,
191   .create_sched_lock    = create_sched_lock,
192   .destroy_sched_lock   = destroy_sched_lock,
193   .get_sched_lock_owner = get_sched_lock_owner,
194   .acquire_sched_lock   = acquire_sched_lock,
195   .release_sched_lock   = release_sched_lock,
196};
197