178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--------------------------------------------------------------------*/ 378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--- Linux ticket lock implementation ticket-lock-linux.c ---*/ 478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--- ---*/ 578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--- Guarantees fair scheduling even if multiple threads are ---*/ 678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--- runnable at the same time on a multicore system. Has been ---*/ 778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--- observed to cause a slow-down compared to the generic ---*/ 878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--- scheduler lock with CPU frequency scaling enabled. Makes ---*/ 978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--- Valgrind slightly faster if CPU frequency scaling has been ---*/ 1078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--- disabled. See also http://bugs.kde.org/show_bug.cgi?id=270006---*/ 1178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/*--------------------------------------------------------------------*/ 1278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 1378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/* 1478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart This file is part of Valgrind, a dynamic binary instrumentation 1578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart framework. 1678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 17ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes Copyright (C) 2011-2017 Bart Van Assche <bvanassche@acm.org>. 1878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 1978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart This program is free software; you can redistribute it and/or 2078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart modify it under the terms of the GNU General Public License as 2178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart published by the Free Software Foundation; either version 2 of the 2278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart License, or (at your option) any later version. 2378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 2478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart This program is distributed in the hope that it will be useful, but 2578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart WITHOUT ANY WARRANTY; without even the implied warranty of 2678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 2778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart General Public License for more details. 2878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 2978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart You should have received a copy of the GNU General Public License 3078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart along with this program; if not, write to the Free Software 3178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 3278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 02111-1307, USA. 3378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 3478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart The GNU General Public License is contained in the file COPYING. 3578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart*/ 3678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 3778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "pub_core_basics.h" 3878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "pub_core_libcassert.h" 3978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "pub_core_libcbase.h" // VG_(memset)() 4078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "pub_core_libcprint.h" 4178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "pub_core_syscall.h" 4278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "pub_core_vki.h" 4378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "pub_core_vkiscnums.h" // __NR_futex 44c91f58449e6fc2a4ce0851639a342c4277612fbbflorian#include "pub_core_libcproc.h" 45c91f58449e6fc2a4ce0851639a342c4277612fbbflorian#include "pub_core_mallocfree.h" 46c91f58449e6fc2a4ce0851639a342c4277612fbbflorian#include "pub_core_threadstate.h" 47c91f58449e6fc2a4ce0851639a342c4277612fbbflorian#include "pub_core_inner.h" 48277eafff8cbea7292851b4bf4d5b6c18385578a0philippe#if defined(ENABLE_INNER_CLIENT_REQUEST) 49277eafff8cbea7292851b4bf4d5b6c18385578a0philippe#include "helgrind/helgrind.h" 50277eafff8cbea7292851b4bf4d5b6c18385578a0philippe#endif 5178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "priv_sched-lock.h" 5278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#include "priv_sched-lock-impl.h" 5378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 5478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#define TL_FUTEX_COUNT_LOG2 4 5578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#define TL_FUTEX_COUNT (1U << TL_FUTEX_COUNT_LOG2) 5678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#define TL_FUTEX_MASK (TL_FUTEX_COUNT - 1) 5778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 5878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartstruct sched_lock { 5978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart volatile unsigned head; 6078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart volatile unsigned tail; 6178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart volatile unsigned futex[TL_FUTEX_COUNT]; 6278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart int owner; 6378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart}; 6478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 6578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#if 1 6678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartstatic Bool s_debug; 6778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#else 6878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartstatic Bool s_debug = True; 6978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart#endif 7078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 71cd19e99a2fc1adf5a142664d9604d18b51b646ecflorianstatic const HChar *get_sched_lock_name(void) 7278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart{ 7378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart return "ticket lock"; 7478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart} 7578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 7678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartstatic struct sched_lock *create_sched_lock(void) 7778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart{ 7878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart struct sched_lock *p; 7978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 8078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart p = VG_(malloc)("sched_lock", sizeof(*p)); 8168790a73bcb290746a5b34c44538c3b2728eaaecflorian 8268790a73bcb290746a5b34c44538c3b2728eaaecflorian // The futex syscall requires that a futex takes four bytes. 8368790a73bcb290746a5b34c44538c3b2728eaaecflorian vg_assert(sizeof(p->futex[0]) == 4); 8468790a73bcb290746a5b34c44538c3b2728eaaecflorian 852857285fdec67e0b464104d7f35bf3cd26a10258florian VG_(memset)(p, 0, sizeof(*p)); 8668790a73bcb290746a5b34c44538c3b2728eaaecflorian 87277eafff8cbea7292851b4bf4d5b6c18385578a0philippe INNER_REQUEST(ANNOTATE_RWLOCK_CREATE(p)); 883f6e9d99048d516266b6d8928a159785771eb054bart INNER_REQUEST(ANNOTATE_BENIGN_RACE_SIZED(&p->futex, sizeof(p->futex), "")); 8978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart return p; 9078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart} 9178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 9278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartstatic void destroy_sched_lock(struct sched_lock *p) 9378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart{ 94277eafff8cbea7292851b4bf4d5b6c18385578a0philippe INNER_REQUEST(ANNOTATE_RWLOCK_DESTROY(p)); 9578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart VG_(free)(p); 9678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart} 9778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 9878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartstatic int get_sched_lock_owner(struct sched_lock *p) 9978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart{ 10078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart return p->owner; 10178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart} 10278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 10378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/* 10478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * Acquire ticket lock. Increment the tail of the queue and use the original 10578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * value as the ticket value. Wait until the head of the queue equals the 10678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * ticket value. The futex used to wait depends on the ticket value in order 10778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * to avoid that all threads get woken up every time a ticket lock is 10878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * released. That last effect is sometimes called the "thundering herd" 10978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * effect. 11078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * 11178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * See also Nick Piggin, x86: FIFO ticket spinlocks, Linux kernel mailing list 11278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * (http://lkml.org/lkml/2007/11/1/125) for more info. 11378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart */ 11478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartstatic void acquire_sched_lock(struct sched_lock *p) 11578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart{ 11678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart unsigned ticket, futex_value; 11778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart volatile unsigned *futex; 11878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart SysRes sres; 11978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 12078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart ticket = __sync_fetch_and_add(&p->tail, 1); 12178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart futex = &p->futex[ticket & TL_FUTEX_MASK]; 12278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart if (s_debug) 123c6e5d76e9eea8625f385ff844545c688c91938daflorian VG_(printf)("[%d/%d] acquire: ticket %u\n", VG_(getpid)(), 12478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart VG_(gettid)(), ticket); 12578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart for (;;) { 12678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart futex_value = *futex; 1270c30a7b6e2ad70dff1924d851ba94f73d309b11abart __sync_synchronize(); 12878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart if (ticket == p->head) 12978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart break; 13078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart if (s_debug) 131c6e5d76e9eea8625f385ff844545c688c91938daflorian VG_(printf)("[%d/%d] acquire: ticket %u - waiting until" 132c6e5d76e9eea8625f385ff844545c688c91938daflorian " futex[%ld] != %u\n", VG_(getpid)(), 13378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart VG_(gettid)(), ticket, (long)(futex - p->futex), 13478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart futex_value); 13578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart sres = VG_(do_syscall3)(__NR_futex, (UWord)futex, 13678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart VKI_FUTEX_WAIT | VKI_FUTEX_PRIVATE_FLAG, 13778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart futex_value); 138bd664fdd6b51670786bf058ae23e3f79a9758487sewardj if (sr_isError(sres) && sr_Err(sres) != VKI_EAGAIN) { 139c6e5d76e9eea8625f385ff844545c688c91938daflorian VG_(printf)("futex_wait() returned error code %lu\n", sr_Err(sres)); 14078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart vg_assert(False); 14178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart } 14278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart } 1430c30a7b6e2ad70dff1924d851ba94f73d309b11abart __sync_synchronize(); 144277eafff8cbea7292851b4bf4d5b6c18385578a0philippe INNER_REQUEST(ANNOTATE_RWLOCK_ACQUIRED(p, /*is_w*/1)); 14578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart vg_assert(p->owner == 0); 14678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart p->owner = VG_(gettid)(); 14778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart} 14878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 14978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart/* 15078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * Release a ticket lock by incrementing the head of the queue. Only generate 15178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * a thread wakeup signal if at least one thread is waiting. If the queue tail 15278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * matches the wakeup_ticket value, no threads have to be woken up. 15378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * 15478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * Note: tail will only be read after head has been incremented since both are 15578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * declared as volatile and since the __sync...() functions imply a memory 15678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart * barrier. 15778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart */ 15878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartstatic void release_sched_lock(struct sched_lock *p) 15978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart{ 16078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart unsigned wakeup_ticket, futex_value; 16178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart volatile unsigned *futex; 16278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart SysRes sres; 16378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 16478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart vg_assert(p->owner != 0); 16578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart p->owner = 0; 166277eafff8cbea7292851b4bf4d5b6c18385578a0philippe INNER_REQUEST(ANNOTATE_RWLOCK_RELEASED(p, /*is_w*/1)); 16778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart wakeup_ticket = __sync_fetch_and_add(&p->head, 1) + 1; 16878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart if (p->tail != wakeup_ticket) { 16978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart futex = &p->futex[wakeup_ticket & TL_FUTEX_MASK]; 17078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart futex_value = __sync_fetch_and_add(futex, 1); 17178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart if (s_debug) 172c6e5d76e9eea8625f385ff844545c688c91938daflorian VG_(printf)("[%d/%d] release: waking up ticket %u (futex[%ld] = %u)" 17378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart "\n", VG_(getpid)(), VG_(gettid)(), wakeup_ticket, 17478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart (long)(futex - p->futex), futex_value); 17578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart sres = VG_(do_syscall3)(__NR_futex, (UWord)futex, 17678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart VKI_FUTEX_WAKE | VKI_FUTEX_PRIVATE_FLAG, 17778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 0x7fffffff); 17878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart vg_assert(!sr_isError(sres)); 17978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart } else { 18078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart if (s_debug) 181c6e5d76e9eea8625f385ff844545c688c91938daflorian VG_(printf)("[%d/%d] release: no thread is waiting for ticket %u\n", 18278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart VG_(getpid)(), VG_(gettid)(), wakeup_ticket); 18378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart } 18478bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart} 18578bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart 18678bfc711d3e684c76eeab5f89a94a78d40ed6f4bbartconst struct sched_lock_ops ML_(linux_ticket_lock_ops) = { 18778bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart .get_sched_lock_name = get_sched_lock_name, 18878bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart .create_sched_lock = create_sched_lock, 18978bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart .destroy_sched_lock = destroy_sched_lock, 19078bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart .get_sched_lock_owner = get_sched_lock_owner, 19178bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart .acquire_sched_lock = acquire_sched_lock, 19278bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart .release_sched_lock = release_sched_lock, 19378bfc711d3e684c76eeab5f89a94a78d40ed6f4bbart}; 194