1663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 2663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--------------------------------------------------------------------*/ 3663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--- Linux ticket lock implementation ticket-lock-linux.c ---*/ 4663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--- ---*/ 5663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--- Guarantees fair scheduling even if multiple threads are ---*/ 6663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--- runnable at the same time on a multicore system. Has been ---*/ 7663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--- observed to cause a slow-down compared to the generic ---*/ 8663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--- scheduler lock with CPU frequency scaling enabled. Makes ---*/ 9663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--- Valgrind slightly faster if CPU frequency scaling has been ---*/ 10663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--- disabled. See also http://bugs.kde.org/show_bug.cgi?id=270006---*/ 11663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/*--------------------------------------------------------------------*/ 12663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 13663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/* 14663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng This file is part of Valgrind, a dynamic binary instrumentation 15663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng framework. 16663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 17436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Copyright (C) 2011-2013 Bart Van Assche <bvanassche@acm.org>. 18663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 19663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng This program is free software; you can redistribute it and/or 20663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng modify it under the terms of the GNU General Public License as 21663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng published by the Free Software Foundation; either version 2 of the 22663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng License, or (at your option) any later version. 23663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 24663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng This program is distributed in the hope that it will be useful, but 25663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng WITHOUT ANY WARRANTY; without even the implied warranty of 26663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 27663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng General Public License for more details. 28663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 29663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng You should have received a copy of the GNU General Public License 30663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng along with this program; if not, write to the Free Software 31663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 32663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 02111-1307, USA. 33663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 34663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng The GNU General Public License is contained in the file COPYING. 35663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng*/ 36663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 37663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "pub_core_basics.h" 38663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "pub_core_libcassert.h" 39663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "pub_core_libcbase.h" // VG_(memset)() 40663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "pub_core_libcprint.h" 41663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "pub_core_syscall.h" 42663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "pub_core_vki.h" 43663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "pub_core_vkiscnums.h" // __NR_futex 44436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#include "pub_core_libcproc.h" 45436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#include "pub_core_mallocfree.h" 46436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#include "pub_core_threadstate.h" 47436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#include "pub_core_inner.h" 48663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#if defined(ENABLE_INNER_CLIENT_REQUEST) 49663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "helgrind/helgrind.h" 50663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#endif 51663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "priv_sched-lock.h" 52663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#include "priv_sched-lock-impl.h" 53663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 54663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#define TL_FUTEX_COUNT_LOG2 4 55663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#define TL_FUTEX_COUNT (1U << TL_FUTEX_COUNT_LOG2) 56663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#define TL_FUTEX_MASK (TL_FUTEX_COUNT - 1) 57663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 58663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstruct sched_lock { 59663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng volatile unsigned head; 60663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng volatile unsigned tail; 61663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng volatile unsigned futex[TL_FUTEX_COUNT]; 62663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng int owner; 63663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng}; 64663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 65663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#if 1 66663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic Bool s_debug; 67663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#else 68663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic Bool s_debug = True; 69663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#endif 70663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 71436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovstatic const HChar *get_sched_lock_name(void) 72663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{ 73663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng return "ticket lock"; 74663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng} 75663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 76663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic struct sched_lock *create_sched_lock(void) 77663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{ 78663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng struct sched_lock *p; 79663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 80663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng p = VG_(malloc)("sched_lock", sizeof(*p)); 81663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (p) { 82663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // The futex syscall requires that a futex takes four bytes. 83663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng vg_assert(sizeof(p->futex[0]) == 4); 84663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 85663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng p->head = 0; 86663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng p->tail = 0; 87663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng VG_(memset)((void*)p->futex, 0, sizeof(p->futex)); 88663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng p->owner = 0; 89663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 90663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng INNER_REQUEST(ANNOTATE_RWLOCK_CREATE(p)); 91663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng INNER_REQUEST(ANNOTATE_BENIGN_RACE_SIZED(&p->futex, sizeof(p->futex), "")); 92663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng return p; 93663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng} 94663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 95663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic void destroy_sched_lock(struct sched_lock *p) 96663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{ 97663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng INNER_REQUEST(ANNOTATE_RWLOCK_DESTROY(p)); 98663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng VG_(free)(p); 99663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng} 100663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 101663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic int get_sched_lock_owner(struct sched_lock *p) 102663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{ 103663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng return p->owner; 104663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng} 105663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 106663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/* 107663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * Acquire ticket lock. Increment the tail of the queue and use the original 108663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * value as the ticket value. Wait until the head of the queue equals the 109663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * ticket value. The futex used to wait depends on the ticket value in order 110663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * to avoid that all threads get woken up every time a ticket lock is 111663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * released. That last effect is sometimes called the "thundering herd" 112663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * effect. 113663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * 114663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * See also Nick Piggin, x86: FIFO ticket spinlocks, Linux kernel mailing list 115663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * (http://lkml.org/lkml/2007/11/1/125) for more info. 116663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng */ 117663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic void acquire_sched_lock(struct sched_lock *p) 118663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{ 119663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng unsigned ticket, futex_value; 120663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng volatile unsigned *futex; 121663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng SysRes sres; 122663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 123663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng ticket = __sync_fetch_and_add(&p->tail, 1); 124663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng futex = &p->futex[ticket & TL_FUTEX_MASK]; 125663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (s_debug) 126663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng VG_(printf)("[%d/%d] acquire: ticket %d\n", VG_(getpid)(), 127663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng VG_(gettid)(), ticket); 128663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng for (;;) { 129663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng futex_value = *futex; 130663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng __sync_synchronize(); 131663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (ticket == p->head) 132663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng break; 133663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (s_debug) 134663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng VG_(printf)("[%d/%d] acquire: ticket %d - waiting until" 135663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng " futex[%ld] != %d\n", VG_(getpid)(), 136663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng VG_(gettid)(), ticket, (long)(futex - p->futex), 137663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng futex_value); 138663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng sres = VG_(do_syscall3)(__NR_futex, (UWord)futex, 139663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng VKI_FUTEX_WAIT | VKI_FUTEX_PRIVATE_FLAG, 140663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng futex_value); 141663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (sr_isError(sres) && sres._val != VKI_EAGAIN) { 142663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng VG_(printf)("futex_wait() returned error code %ld\n", sres._val); 143663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng vg_assert(False); 144663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 145663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 146663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng __sync_synchronize(); 147663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng INNER_REQUEST(ANNOTATE_RWLOCK_ACQUIRED(p, /*is_w*/1)); 148663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng vg_assert(p->owner == 0); 149663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng p->owner = VG_(gettid)(); 150663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng} 151663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 152663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/* 153663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * Release a ticket lock by incrementing the head of the queue. Only generate 154663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * a thread wakeup signal if at least one thread is waiting. If the queue tail 155663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * matches the wakeup_ticket value, no threads have to be woken up. 156663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * 157663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * Note: tail will only be read after head has been incremented since both are 158663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * declared as volatile and since the __sync...() functions imply a memory 159663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng * barrier. 160663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng */ 161663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic void release_sched_lock(struct sched_lock *p) 162663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{ 163663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng unsigned wakeup_ticket, futex_value; 164663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng volatile unsigned *futex; 165663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng SysRes sres; 166663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 167663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng vg_assert(p->owner != 0); 168663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng p->owner = 0; 169663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng INNER_REQUEST(ANNOTATE_RWLOCK_RELEASED(p, /*is_w*/1)); 170663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng wakeup_ticket = __sync_fetch_and_add(&p->head, 1) + 1; 171663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (p->tail != wakeup_ticket) { 172663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng futex = &p->futex[wakeup_ticket & TL_FUTEX_MASK]; 173663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng futex_value = __sync_fetch_and_add(futex, 1); 174663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (s_debug) 175663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng VG_(printf)("[%d/%d] release: waking up ticket %d (futex[%ld] = %d)" 176663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng "\n", VG_(getpid)(), VG_(gettid)(), wakeup_ticket, 177663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng (long)(futex - p->futex), futex_value); 178663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng sres = VG_(do_syscall3)(__NR_futex, (UWord)futex, 179663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng VKI_FUTEX_WAKE | VKI_FUTEX_PRIVATE_FLAG, 180663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 0x7fffffff); 181663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng vg_assert(!sr_isError(sres)); 182663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } else { 183663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (s_debug) 184663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng VG_(printf)("[%d/%d] release: no thread is waiting for ticket %d\n", 185663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng VG_(getpid)(), VG_(gettid)(), wakeup_ticket); 186663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 187663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng} 188663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 189663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengconst struct sched_lock_ops ML_(linux_ticket_lock_ops) = { 190663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng .get_sched_lock_name = get_sched_lock_name, 191663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng .create_sched_lock = create_sched_lock, 192663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng .destroy_sched_lock = destroy_sched_lock, 193663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng .get_sched_lock_owner = get_sched_lock_owner, 194663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng .acquire_sched_lock = acquire_sched_lock, 195663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng .release_sched_lock = release_sched_lock, 196663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng}; 197