pthread.c revision 06823da2f0c8b4a4ce4c45113032f03df85c94b8
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *  * Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *  * Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *    the documentation and/or other materials provided with the
13 *    distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28#include <sys/types.h>
29#include <unistd.h>
30#include <signal.h>
31#include <stdint.h>
32#include <stdio.h>
33#include <stdlib.h>
34#include <errno.h>
35#include <sys/atomics.h>
36#include <bionic_tls.h>
37#include <sys/mman.h>
38#include <pthread.h>
39#include <time.h>
40#include "pthread_internal.h"
41#include "thread_private.h"
42#include <limits.h>
43#include <memory.h>
44#include <assert.h>
45#include <malloc.h>
46#include <bionic_futex.h>
47#include <bionic_atomic_inline.h>
48#include <sys/prctl.h>
49#include <sys/stat.h>
50#include <fcntl.h>
51#include <stdio.h>
52#include <bionic_pthread.h>
53
54extern void pthread_debug_mutex_lock_check(pthread_mutex_t *mutex);
55extern void pthread_debug_mutex_unlock_check(pthread_mutex_t *mutex);
56
57extern int  __pthread_clone(int (*fn)(void*), void *child_stack, int flags, void *arg);
58extern void _exit_with_stack_teardown(void * stackBase, int stackSize, int retCode);
59extern void _exit_thread(int  retCode);
60extern int  __set_errno(int);
61
62int  __futex_wake_ex(volatile void *ftx, int pshared, int val)
63{
64    return __futex_syscall3(ftx, pshared ? FUTEX_WAKE : FUTEX_WAKE_PRIVATE, val);
65}
66
67int  __futex_wait_ex(volatile void *ftx, int pshared, int val, const struct timespec *timeout)
68{
69    return __futex_syscall4(ftx, pshared ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE, val, timeout);
70}
71
72#define  __likely(cond)    __builtin_expect(!!(cond), 1)
73#define  __unlikely(cond)  __builtin_expect(!!(cond), 0)
74
75#ifdef __i386__
76#define ATTRIBUTES __attribute__((noinline)) __attribute__((fastcall))
77#else
78#define ATTRIBUTES __attribute__((noinline))
79#endif
80
81void ATTRIBUTES _thread_created_hook(pid_t thread_id);
82
83#define PTHREAD_ATTR_FLAG_DETACHED      0x00000001
84#define PTHREAD_ATTR_FLAG_USER_STACK    0x00000002
85
86#define DEFAULT_STACKSIZE (1024 * 1024)
87
88static pthread_mutex_t mmap_lock = PTHREAD_MUTEX_INITIALIZER;
89
90
91static const pthread_attr_t gDefaultPthreadAttr = {
92    .flags = 0,
93    .stack_base = NULL,
94    .stack_size = DEFAULT_STACKSIZE,
95    .guard_size = PAGE_SIZE,
96    .sched_policy = SCHED_NORMAL,
97    .sched_priority = 0
98};
99
100#define  INIT_THREADS  1
101
102static pthread_internal_t*  gThreadList = NULL;
103static pthread_mutex_t gThreadListLock = PTHREAD_MUTEX_INITIALIZER;
104static pthread_mutex_t gDebuggerNotificationLock = PTHREAD_MUTEX_INITIALIZER;
105
106
107/* we simply malloc/free the internal pthread_internal_t structures. we may
108 * want to use a different allocation scheme in the future, but this one should
109 * be largely enough
110 */
111static pthread_internal_t*
112_pthread_internal_alloc(void)
113{
114    pthread_internal_t*   thread;
115
116    thread = calloc( sizeof(*thread), 1 );
117    if (thread)
118        thread->intern = 1;
119
120    return thread;
121}
122
123static void
124_pthread_internal_free( pthread_internal_t*  thread )
125{
126    if (thread && thread->intern) {
127        thread->intern = 0;  /* just in case */
128        free (thread);
129    }
130}
131
132
133static void
134_pthread_internal_remove_locked( pthread_internal_t*  thread )
135{
136    thread->next->pref = thread->pref;
137    thread->pref[0]    = thread->next;
138}
139
140static void
141_pthread_internal_remove( pthread_internal_t*  thread )
142{
143    pthread_mutex_lock(&gThreadListLock);
144    _pthread_internal_remove_locked(thread);
145    pthread_mutex_unlock(&gThreadListLock);
146}
147
148static void
149_pthread_internal_add( pthread_internal_t*  thread )
150{
151    pthread_mutex_lock(&gThreadListLock);
152    thread->pref = &gThreadList;
153    thread->next = thread->pref[0];
154    if (thread->next)
155        thread->next->pref = &thread->next;
156    thread->pref[0] = thread;
157    pthread_mutex_unlock(&gThreadListLock);
158}
159
160pthread_internal_t*
161__get_thread(void)
162{
163    void**  tls = (void**)__get_tls();
164
165    return  (pthread_internal_t*) tls[TLS_SLOT_THREAD_ID];
166}
167
168
169void*
170__get_stack_base(int  *p_stack_size)
171{
172    pthread_internal_t*  thread = __get_thread();
173
174    *p_stack_size = thread->attr.stack_size;
175    return thread->attr.stack_base;
176}
177
178
179void  __init_tls(void**  tls, void*  thread)
180{
181    int  nn;
182
183    ((pthread_internal_t*)thread)->tls = tls;
184
185    // slot 0 must point to the tls area, this is required by the implementation
186    // of the x86 Linux kernel thread-local-storage
187    tls[TLS_SLOT_SELF]      = (void*)tls;
188    tls[TLS_SLOT_THREAD_ID] = thread;
189    for (nn = TLS_SLOT_ERRNO; nn < BIONIC_TLS_SLOTS; nn++)
190       tls[nn] = 0;
191
192    __set_tls( (void*)tls );
193}
194
195
196/*
197 * This trampoline is called from the assembly clone() function
198 */
199void __thread_entry(int (*func)(void*), void *arg, void **tls)
200{
201    int retValue;
202    pthread_internal_t * thrInfo;
203
204    // Wait for our creating thread to release us. This lets it have time to
205    // notify gdb about this thread before it starts doing anything.
206    //
207    // This also provides the memory barrier needed to ensure that all memory
208    // accesses previously made by the creating thread are visible to us.
209    pthread_mutex_t * start_mutex = (pthread_mutex_t *)&tls[TLS_SLOT_SELF];
210    pthread_mutex_lock(start_mutex);
211    pthread_mutex_destroy(start_mutex);
212
213    thrInfo = (pthread_internal_t *) tls[TLS_SLOT_THREAD_ID];
214
215    __init_tls( tls, thrInfo );
216
217    pthread_exit( (void*)func(arg) );
218}
219
220void _init_thread(pthread_internal_t * thread, pid_t kernel_id, pthread_attr_t * attr, void * stack_base)
221{
222    if (attr == NULL) {
223        thread->attr = gDefaultPthreadAttr;
224    } else {
225        thread->attr = *attr;
226    }
227    thread->attr.stack_base = stack_base;
228    thread->kernel_id       = kernel_id;
229
230    // set the scheduling policy/priority of the thread
231    if (thread->attr.sched_policy != SCHED_NORMAL) {
232        struct sched_param param;
233        param.sched_priority = thread->attr.sched_priority;
234        sched_setscheduler(kernel_id, thread->attr.sched_policy, &param);
235    }
236
237    pthread_cond_init(&thread->join_cond, NULL);
238    thread->join_count = 0;
239
240    thread->cleanup_stack = NULL;
241
242    _pthread_internal_add(thread);
243}
244
245
246/* XXX stacks not reclaimed if thread spawn fails */
247/* XXX stacks address spaces should be reused if available again */
248
249static void *mkstack(size_t size, size_t guard_size)
250{
251    void * stack;
252
253    pthread_mutex_lock(&mmap_lock);
254
255    stack = mmap(NULL, size,
256                 PROT_READ | PROT_WRITE,
257                 MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE,
258                 -1, 0);
259
260    if(stack == MAP_FAILED) {
261        stack = NULL;
262        goto done;
263    }
264
265    if(mprotect(stack, guard_size, PROT_NONE)){
266        munmap(stack, size);
267        stack = NULL;
268        goto done;
269    }
270
271done:
272    pthread_mutex_unlock(&mmap_lock);
273    return stack;
274}
275
276/*
277 * Create a new thread. The thread's stack is laid out like so:
278 *
279 * +---------------------------+
280 * |     pthread_internal_t    |
281 * +---------------------------+
282 * |                           |
283 * |          TLS area         |
284 * |                           |
285 * +---------------------------+
286 * |                           |
287 * .                           .
288 * .         stack area        .
289 * .                           .
290 * |                           |
291 * +---------------------------+
292 * |         guard page        |
293 * +---------------------------+
294 *
295 *  note that TLS[0] must be a pointer to itself, this is required
296 *  by the thread-local storage implementation of the x86 Linux
297 *  kernel, where the TLS pointer is read by reading fs:[0]
298 */
299int pthread_create(pthread_t *thread_out, pthread_attr_t const * attr,
300                   void *(*start_routine)(void *), void * arg)
301{
302    char*   stack;
303    void**  tls;
304    int tid;
305    pthread_mutex_t * start_mutex;
306    pthread_internal_t * thread;
307    int                  madestack = 0;
308    int     old_errno = errno;
309
310    /* this will inform the rest of the C library that at least one thread
311     * was created. this will enforce certain functions to acquire/release
312     * locks (e.g. atexit()) to protect shared global structures.
313     *
314     * this works because pthread_create() is not called by the C library
315     * initialization routine that sets up the main thread's data structures.
316     */
317    __isthreaded = 1;
318
319    thread = _pthread_internal_alloc();
320    if (thread == NULL)
321        return ENOMEM;
322
323    if (attr == NULL) {
324        attr = &gDefaultPthreadAttr;
325    }
326
327    // make sure the stack is PAGE_SIZE aligned
328    size_t stackSize = (attr->stack_size +
329                        (PAGE_SIZE-1)) & ~(PAGE_SIZE-1);
330
331    if (!attr->stack_base) {
332        stack = mkstack(stackSize, attr->guard_size);
333        if(stack == NULL) {
334            _pthread_internal_free(thread);
335            return ENOMEM;
336        }
337        madestack = 1;
338    } else {
339        stack = attr->stack_base;
340    }
341
342    // Make room for TLS
343    tls = (void**)(stack + stackSize - BIONIC_TLS_SLOTS*sizeof(void*));
344
345    // Create a mutex for the thread in TLS_SLOT_SELF to wait on once it starts so we can keep
346    // it from doing anything until after we notify the debugger about it
347    //
348    // This also provides the memory barrier we need to ensure that all
349    // memory accesses previously performed by this thread are visible to
350    // the new thread.
351    start_mutex = (pthread_mutex_t *) &tls[TLS_SLOT_SELF];
352    pthread_mutex_init(start_mutex, NULL);
353    pthread_mutex_lock(start_mutex);
354
355    tls[TLS_SLOT_THREAD_ID] = thread;
356
357    tid = __pthread_clone((int(*)(void*))start_routine, tls,
358                CLONE_FILES | CLONE_FS | CLONE_VM | CLONE_SIGHAND
359                | CLONE_THREAD | CLONE_SYSVSEM | CLONE_DETACHED,
360                arg);
361
362    if(tid < 0) {
363        int  result;
364        if (madestack)
365            munmap(stack, stackSize);
366        _pthread_internal_free(thread);
367        result = errno;
368        errno = old_errno;
369        return result;
370    }
371
372    _init_thread(thread, tid, (pthread_attr_t*)attr, stack);
373
374    if (!madestack)
375        thread->attr.flags |= PTHREAD_ATTR_FLAG_USER_STACK;
376
377    // Notify any debuggers about the new thread
378    pthread_mutex_lock(&gDebuggerNotificationLock);
379    _thread_created_hook(tid);
380    pthread_mutex_unlock(&gDebuggerNotificationLock);
381
382    // Let the thread do it's thing
383    pthread_mutex_unlock(start_mutex);
384
385    *thread_out = (pthread_t)thread;
386    return 0;
387}
388
389
390int pthread_attr_init(pthread_attr_t * attr)
391{
392    *attr = gDefaultPthreadAttr;
393    return 0;
394}
395
396int pthread_attr_destroy(pthread_attr_t * attr)
397{
398    memset(attr, 0x42, sizeof(pthread_attr_t));
399    return 0;
400}
401
402int pthread_attr_setdetachstate(pthread_attr_t * attr, int state)
403{
404    if (state == PTHREAD_CREATE_DETACHED) {
405        attr->flags |= PTHREAD_ATTR_FLAG_DETACHED;
406    } else if (state == PTHREAD_CREATE_JOINABLE) {
407        attr->flags &= ~PTHREAD_ATTR_FLAG_DETACHED;
408    } else {
409        return EINVAL;
410    }
411    return 0;
412}
413
414int pthread_attr_getdetachstate(pthread_attr_t const * attr, int * state)
415{
416    *state = (attr->flags & PTHREAD_ATTR_FLAG_DETACHED)
417           ? PTHREAD_CREATE_DETACHED
418           : PTHREAD_CREATE_JOINABLE;
419    return 0;
420}
421
422int pthread_attr_setschedpolicy(pthread_attr_t * attr, int policy)
423{
424    attr->sched_policy = policy;
425    return 0;
426}
427
428int pthread_attr_getschedpolicy(pthread_attr_t const * attr, int * policy)
429{
430    *policy = attr->sched_policy;
431    return 0;
432}
433
434int pthread_attr_setschedparam(pthread_attr_t * attr, struct sched_param const * param)
435{
436    attr->sched_priority = param->sched_priority;
437    return 0;
438}
439
440int pthread_attr_getschedparam(pthread_attr_t const * attr, struct sched_param * param)
441{
442    param->sched_priority = attr->sched_priority;
443    return 0;
444}
445
446int pthread_attr_setstacksize(pthread_attr_t * attr, size_t stack_size)
447{
448    if ((stack_size & (PAGE_SIZE - 1) || stack_size < PTHREAD_STACK_MIN)) {
449        return EINVAL;
450    }
451    attr->stack_size = stack_size;
452    return 0;
453}
454
455int pthread_attr_getstacksize(pthread_attr_t const * attr, size_t * stack_size)
456{
457    *stack_size = attr->stack_size;
458    return 0;
459}
460
461int pthread_attr_setstackaddr(pthread_attr_t * attr, void * stack_addr)
462{
463#if 1
464    // It's not clear if this is setting the top or bottom of the stack, so don't handle it for now.
465    return ENOSYS;
466#else
467    if ((uint32_t)stack_addr & (PAGE_SIZE - 1)) {
468        return EINVAL;
469    }
470    attr->stack_base = stack_addr;
471    return 0;
472#endif
473}
474
475int pthread_attr_getstackaddr(pthread_attr_t const * attr, void ** stack_addr)
476{
477    *stack_addr = (char*)attr->stack_base + attr->stack_size;
478    return 0;
479}
480
481int pthread_attr_setstack(pthread_attr_t * attr, void * stack_base, size_t stack_size)
482{
483    if ((stack_size & (PAGE_SIZE - 1) || stack_size < PTHREAD_STACK_MIN)) {
484        return EINVAL;
485    }
486    if ((uint32_t)stack_base & (PAGE_SIZE - 1)) {
487        return EINVAL;
488    }
489    attr->stack_base = stack_base;
490    attr->stack_size = stack_size;
491    return 0;
492}
493
494int pthread_attr_getstack(pthread_attr_t const * attr, void ** stack_base, size_t * stack_size)
495{
496    *stack_base = attr->stack_base;
497    *stack_size = attr->stack_size;
498    return 0;
499}
500
501int pthread_attr_setguardsize(pthread_attr_t * attr, size_t guard_size)
502{
503    if (guard_size & (PAGE_SIZE - 1) || guard_size < PAGE_SIZE) {
504        return EINVAL;
505    }
506
507    attr->guard_size = guard_size;
508    return 0;
509}
510
511int pthread_attr_getguardsize(pthread_attr_t const * attr, size_t * guard_size)
512{
513    *guard_size = attr->guard_size;
514    return 0;
515}
516
517int pthread_getattr_np(pthread_t thid, pthread_attr_t * attr)
518{
519    pthread_internal_t * thread = (pthread_internal_t *)thid;
520    *attr = thread->attr;
521    return 0;
522}
523
524int pthread_attr_setscope(pthread_attr_t *attr, int  scope)
525{
526    if (scope == PTHREAD_SCOPE_SYSTEM)
527        return 0;
528    if (scope == PTHREAD_SCOPE_PROCESS)
529        return ENOTSUP;
530
531    return EINVAL;
532}
533
534int pthread_attr_getscope(pthread_attr_t const *attr)
535{
536    return PTHREAD_SCOPE_SYSTEM;
537}
538
539
540/* CAVEAT: our implementation of pthread_cleanup_push/pop doesn't support C++ exceptions
541 *         and thread cancelation
542 */
543
544void __pthread_cleanup_push( __pthread_cleanup_t*      c,
545                             __pthread_cleanup_func_t  routine,
546                             void*                     arg )
547{
548    pthread_internal_t*  thread = __get_thread();
549
550    c->__cleanup_routine  = routine;
551    c->__cleanup_arg      = arg;
552    c->__cleanup_prev     = thread->cleanup_stack;
553    thread->cleanup_stack = c;
554}
555
556void __pthread_cleanup_pop( __pthread_cleanup_t*  c, int  execute )
557{
558    pthread_internal_t*  thread = __get_thread();
559
560    thread->cleanup_stack = c->__cleanup_prev;
561    if (execute)
562        c->__cleanup_routine(c->__cleanup_arg);
563}
564
565/* used by pthread_exit() to clean all TLS keys of the current thread */
566static void pthread_key_clean_all(void);
567
568void pthread_exit(void * retval)
569{
570    pthread_internal_t*  thread     = __get_thread();
571    void*                stack_base = thread->attr.stack_base;
572    int                  stack_size = thread->attr.stack_size;
573    int                  user_stack = (thread->attr.flags & PTHREAD_ATTR_FLAG_USER_STACK) != 0;
574    sigset_t mask;
575
576    // call the cleanup handlers first
577    while (thread->cleanup_stack) {
578        __pthread_cleanup_t*  c = thread->cleanup_stack;
579        thread->cleanup_stack   = c->__cleanup_prev;
580        c->__cleanup_routine(c->__cleanup_arg);
581    }
582
583    // call the TLS destructors, it is important to do that before removing this
584    // thread from the global list. this will ensure that if someone else deletes
585    // a TLS key, the corresponding value will be set to NULL in this thread's TLS
586    // space (see pthread_key_delete)
587    pthread_key_clean_all();
588
589    // if the thread is detached, destroy the pthread_internal_t
590    // otherwise, keep it in memory and signal any joiners
591    if (thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) {
592        _pthread_internal_remove(thread);
593        _pthread_internal_free(thread);
594    } else {
595       /* the join_count field is used to store the number of threads waiting for
596        * the termination of this thread with pthread_join(),
597        *
598        * if it is positive we need to signal the waiters, and we do not touch
599        * the count (it will be decremented by the waiters, the last one will
600        * also remove/free the thread structure
601        *
602        * if it is zero, we set the count value to -1 to indicate that the
603        * thread is in 'zombie' state: it has stopped executing, and its stack
604        * is gone (as well as its TLS area). when another thread calls pthread_join()
605        * on it, it will immediately free the thread and return.
606        */
607        pthread_mutex_lock(&gThreadListLock);
608        thread->return_value = retval;
609        if (thread->join_count > 0) {
610            pthread_cond_broadcast(&thread->join_cond);
611        } else {
612            thread->join_count = -1;  /* zombie thread */
613        }
614        pthread_mutex_unlock(&gThreadListLock);
615    }
616
617    sigfillset(&mask);
618    sigdelset(&mask, SIGSEGV);
619    (void)sigprocmask(SIG_SETMASK, &mask, (sigset_t *)NULL);
620
621    // destroy the thread stack
622    if (user_stack)
623        _exit_thread((int)retval);
624    else
625        _exit_with_stack_teardown(stack_base, stack_size, (int)retval);
626}
627
628int pthread_join(pthread_t thid, void ** ret_val)
629{
630    pthread_internal_t*  thread = (pthread_internal_t*)thid;
631    int                  count;
632
633    // check that the thread still exists and is not detached
634    pthread_mutex_lock(&gThreadListLock);
635
636    for (thread = gThreadList; thread != NULL; thread = thread->next)
637        if (thread == (pthread_internal_t*)thid)
638            goto FoundIt;
639
640    pthread_mutex_unlock(&gThreadListLock);
641    return ESRCH;
642
643FoundIt:
644    if (thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) {
645        pthread_mutex_unlock(&gThreadListLock);
646        return EINVAL;
647    }
648
649   /* wait for thread death when needed
650    *
651    * if the 'join_count' is negative, this is a 'zombie' thread that
652    * is already dead and without stack/TLS
653    *
654    * otherwise, we need to increment 'join-count' and wait to be signaled
655    */
656   count = thread->join_count;
657    if (count >= 0) {
658        thread->join_count += 1;
659        pthread_cond_wait( &thread->join_cond, &gThreadListLock );
660        count = --thread->join_count;
661    }
662    if (ret_val)
663        *ret_val = thread->return_value;
664
665    /* remove thread descriptor when we're the last joiner or when the
666     * thread was already a zombie.
667     */
668    if (count <= 0) {
669        _pthread_internal_remove_locked(thread);
670        _pthread_internal_free(thread);
671    }
672    pthread_mutex_unlock(&gThreadListLock);
673    return 0;
674}
675
676int  pthread_detach( pthread_t  thid )
677{
678    pthread_internal_t*  thread;
679    int                  result = 0;
680    int                  flags;
681
682    pthread_mutex_lock(&gThreadListLock);
683    for (thread = gThreadList; thread != NULL; thread = thread->next)
684        if (thread == (pthread_internal_t*)thid)
685            goto FoundIt;
686
687    result = ESRCH;
688    goto Exit;
689
690FoundIt:
691    do {
692        flags = thread->attr.flags;
693
694        if ( flags & PTHREAD_ATTR_FLAG_DETACHED ) {
695            /* thread is not joinable ! */
696            result = EINVAL;
697            goto Exit;
698        }
699    }
700    while ( __bionic_cmpxchg( flags, flags | PTHREAD_ATTR_FLAG_DETACHED,
701                              (volatile int*)&thread->attr.flags ) != 0 );
702Exit:
703    pthread_mutex_unlock(&gThreadListLock);
704    return result;
705}
706
707pthread_t pthread_self(void)
708{
709    return (pthread_t)__get_thread();
710}
711
712int pthread_equal(pthread_t one, pthread_t two)
713{
714    return (one == two ? 1 : 0);
715}
716
717int pthread_getschedparam(pthread_t thid, int * policy,
718                          struct sched_param * param)
719{
720    int  old_errno = errno;
721
722    pthread_internal_t * thread = (pthread_internal_t *)thid;
723    int err = sched_getparam(thread->kernel_id, param);
724    if (!err) {
725        *policy = sched_getscheduler(thread->kernel_id);
726    } else {
727        err = errno;
728        errno = old_errno;
729    }
730    return err;
731}
732
733int pthread_setschedparam(pthread_t thid, int policy,
734                          struct sched_param const * param)
735{
736    pthread_internal_t * thread = (pthread_internal_t *)thid;
737    int                  old_errno = errno;
738    int                  ret;
739
740    ret = sched_setscheduler(thread->kernel_id, policy, param);
741    if (ret < 0) {
742        ret = errno;
743        errno = old_errno;
744    }
745    return ret;
746}
747
748
749/* a mutex is implemented as a 32-bit integer holding the following fields
750 *
751 * bits:     name     description
752 * 31-16     tid      owner thread's kernel id (recursive and errorcheck only)
753 * 15-14     type     mutex type
754 * 13        shared   process-shared flag
755 * 12-2      counter  counter of recursive mutexes
756 * 1-0       state    lock state (0, 1 or 2)
757 */
758
759/* Convenience macro, creates a mask of 'bits' bits that starts from
760 * the 'shift'-th least significant bit in a 32-bit word.
761 *
762 * Examples: FIELD_MASK(0,4)  -> 0xf
763 *           FIELD_MASK(16,9) -> 0x1ff0000
764 */
765#define  FIELD_MASK(shift,bits)           (((1 << (bits))-1) << (shift))
766
767/* This one is used to create a bit pattern from a given field value */
768#define  FIELD_TO_BITS(val,shift,bits)    (((val) & ((1 << (bits))-1)) << (shift))
769
770/* And this one does the opposite, i.e. extract a field's value from a bit pattern */
771#define  FIELD_FROM_BITS(val,shift,bits)  (((val) >> (shift)) & ((1 << (bits))-1))
772
773/* Mutex state:
774 *
775 * 0 for unlocked
776 * 1 for locked, no waiters
777 * 2 for locked, maybe waiters
778 */
779#define  MUTEX_STATE_SHIFT      0
780#define  MUTEX_STATE_LEN        2
781
782#define  MUTEX_STATE_MASK           FIELD_MASK(MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
783#define  MUTEX_STATE_FROM_BITS(v)   FIELD_FROM_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
784#define  MUTEX_STATE_TO_BITS(v)     FIELD_TO_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
785
786#define  MUTEX_STATE_UNLOCKED            0   /* must be 0 to match __PTHREAD_MUTEX_INIT_VALUE */
787#define  MUTEX_STATE_LOCKED_UNCONTENDED  1   /* must be 1 due to atomic dec in unlock operation */
788#define  MUTEX_STATE_LOCKED_CONTENDED    2   /* must be 1 + LOCKED_UNCONTENDED due to atomic dec */
789
790#define  MUTEX_STATE_FROM_BITS(v)    FIELD_FROM_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
791#define  MUTEX_STATE_TO_BITS(v)      FIELD_TO_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
792
793#define  MUTEX_STATE_BITS_UNLOCKED            MUTEX_STATE_TO_BITS(MUTEX_STATE_UNLOCKED)
794#define  MUTEX_STATE_BITS_LOCKED_UNCONTENDED  MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_UNCONTENDED)
795#define  MUTEX_STATE_BITS_LOCKED_CONTENDED    MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_CONTENDED)
796
797/* return true iff the mutex if locked with no waiters */
798#define  MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(v)  (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_UNCONTENDED)
799
800/* return true iff the mutex if locked with maybe waiters */
801#define  MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(v)   (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_CONTENDED)
802
803/* used to flip from LOCKED_UNCONTENDED to LOCKED_CONTENDED */
804#define  MUTEX_STATE_BITS_FLIP_CONTENTION(v)      ((v) ^ (MUTEX_STATE_BITS_LOCKED_CONTENDED ^ MUTEX_STATE_BITS_LOCKED_UNCONTENDED))
805
806/* Mutex counter:
807 *
808 * We need to check for overflow before incrementing, and we also need to
809 * detect when the counter is 0
810 */
811#define  MUTEX_COUNTER_SHIFT         2
812#define  MUTEX_COUNTER_LEN           11
813#define  MUTEX_COUNTER_MASK          FIELD_MASK(MUTEX_COUNTER_SHIFT, MUTEX_COUNTER_LEN)
814
815#define  MUTEX_COUNTER_BITS_WILL_OVERFLOW(v)    (((v) & MUTEX_COUNTER_MASK) == MUTEX_COUNTER_MASK)
816#define  MUTEX_COUNTER_BITS_IS_ZERO(v)          (((v) & MUTEX_COUNTER_MASK) == 0)
817
818/* Used to increment the counter directly after overflow has been checked */
819#define  MUTEX_COUNTER_BITS_ONE      FIELD_TO_BITS(1,MUTEX_COUNTER_SHIFT,MUTEX_COUNTER_LEN)
820
821/* Returns true iff the counter is 0 */
822#define  MUTEX_COUNTER_BITS_ARE_ZERO(v)  (((v) & MUTEX_COUNTER_MASK) == 0)
823
824/* Mutex shared bit flag
825 *
826 * This flag is set to indicate that the mutex is shared among processes.
827 * This changes the futex opcode we use for futex wait/wake operations
828 * (non-shared operations are much faster).
829 */
830#define  MUTEX_SHARED_SHIFT    13
831#define  MUTEX_SHARED_MASK     FIELD_MASK(MUTEX_SHARED_SHIFT,1)
832
833/* Mutex type:
834 *
835 * We support normal, recursive and errorcheck mutexes.
836 *
837 * The constants defined here *cannot* be changed because they must match
838 * the C library ABI which defines the following initialization values in
839 * <pthread.h>:
840 *
841 *   __PTHREAD_MUTEX_INIT_VALUE
842 *   __PTHREAD_RECURSIVE_MUTEX_VALUE
843 *   __PTHREAD_ERRORCHECK_MUTEX_INIT_VALUE
844 */
845#define  MUTEX_TYPE_SHIFT      14
846#define  MUTEX_TYPE_LEN        2
847#define  MUTEX_TYPE_MASK       FIELD_MASK(MUTEX_TYPE_SHIFT,MUTEX_TYPE_LEN)
848
849#define  MUTEX_TYPE_NORMAL          0  /* Must be 0 to match __PTHREAD_MUTEX_INIT_VALUE */
850#define  MUTEX_TYPE_RECURSIVE       1
851#define  MUTEX_TYPE_ERRORCHECK      2
852
853#define  MUTEX_TYPE_TO_BITS(t)       FIELD_TO_BITS(t, MUTEX_TYPE_SHIFT, MUTEX_TYPE_LEN)
854
855#define  MUTEX_TYPE_BITS_NORMAL      MUTEX_TYPE_TO_BITS(MUTEX_TYPE_NORMAL)
856#define  MUTEX_TYPE_BITS_RECURSIVE   MUTEX_TYPE_TO_BITS(MUTEX_TYPE_RECURSIVE)
857#define  MUTEX_TYPE_BITS_ERRORCHECK  MUTEX_TYPE_TO_BITS(MUTEX_TYPE_ERRORCHECK)
858
859/* Mutex owner field:
860 *
861 * This is only used for recursive and errorcheck mutexes. It holds the
862 * kernel TID of the owning thread. Note that this works because the Linux
863 * kernel _only_ uses 16-bit values for thread ids.
864 *
865 * More specifically, it will wrap to 10000 when it reaches over 32768 for
866 * application processes. You can check this by running the following inside
867 * an adb shell session:
868 *
869    OLDPID=$$;
870    while true; do
871    NEWPID=$(sh -c 'echo $$')
872    if [ "$NEWPID" -gt 32768 ]; then
873        echo "AARGH: new PID $NEWPID is too high!"
874        exit 1
875    fi
876    if [ "$NEWPID" -lt "$OLDPID" ]; then
877        echo "****** Wrapping from PID $OLDPID to $NEWPID. *******"
878    else
879        echo -n "$NEWPID!"
880    fi
881    OLDPID=$NEWPID
882    done
883
884 * Note that you can run the same example on a desktop Linux system,
885 * the wrapping will also happen at 32768, but will go back to 300 instead.
886 */
887#define  MUTEX_OWNER_SHIFT     16
888#define  MUTEX_OWNER_LEN       16
889
890#define  MUTEX_OWNER_FROM_BITS(v)    FIELD_FROM_BITS(v,MUTEX_OWNER_SHIFT,MUTEX_OWNER_LEN)
891#define  MUTEX_OWNER_TO_BITS(v)      FIELD_TO_BITS(v,MUTEX_OWNER_SHIFT,MUTEX_OWNER_LEN)
892
893/* Convenience macros.
894 *
895 * These are used to form or modify the bit pattern of a given mutex value
896 */
897
898
899
900/* a mutex attribute holds the following fields
901 *
902 * bits:     name       description
903 * 0-3       type       type of mutex
904 * 4         shared     process-shared flag
905 */
906#define  MUTEXATTR_TYPE_MASK   0x000f
907#define  MUTEXATTR_SHARED_MASK 0x0010
908
909
910int pthread_mutexattr_init(pthread_mutexattr_t *attr)
911{
912    if (attr) {
913        *attr = PTHREAD_MUTEX_DEFAULT;
914        return 0;
915    } else {
916        return EINVAL;
917    }
918}
919
920int pthread_mutexattr_destroy(pthread_mutexattr_t *attr)
921{
922    if (attr) {
923        *attr = -1;
924        return 0;
925    } else {
926        return EINVAL;
927    }
928}
929
930int pthread_mutexattr_gettype(const pthread_mutexattr_t *attr, int *type)
931{
932    if (attr) {
933        int  atype = (*attr & MUTEXATTR_TYPE_MASK);
934
935         if (atype >= PTHREAD_MUTEX_NORMAL &&
936             atype <= PTHREAD_MUTEX_ERRORCHECK) {
937            *type = atype;
938            return 0;
939        }
940    }
941    return EINVAL;
942}
943
944int pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type)
945{
946    if (attr && type >= PTHREAD_MUTEX_NORMAL &&
947                type <= PTHREAD_MUTEX_ERRORCHECK ) {
948        *attr = (*attr & ~MUTEXATTR_TYPE_MASK) | type;
949        return 0;
950    }
951    return EINVAL;
952}
953
954/* process-shared mutexes are not supported at the moment */
955
956int pthread_mutexattr_setpshared(pthread_mutexattr_t *attr, int  pshared)
957{
958    if (!attr)
959        return EINVAL;
960
961    switch (pshared) {
962    case PTHREAD_PROCESS_PRIVATE:
963        *attr &= ~MUTEXATTR_SHARED_MASK;
964        return 0;
965
966    case PTHREAD_PROCESS_SHARED:
967        /* our current implementation of pthread actually supports shared
968         * mutexes but won't cleanup if a process dies with the mutex held.
969         * Nevertheless, it's better than nothing. Shared mutexes are used
970         * by surfaceflinger and audioflinger.
971         */
972        *attr |= MUTEXATTR_SHARED_MASK;
973        return 0;
974    }
975    return EINVAL;
976}
977
978int pthread_mutexattr_getpshared(pthread_mutexattr_t *attr, int *pshared)
979{
980    if (!attr || !pshared)
981        return EINVAL;
982
983    *pshared = (*attr & MUTEXATTR_SHARED_MASK) ? PTHREAD_PROCESS_SHARED
984                                               : PTHREAD_PROCESS_PRIVATE;
985    return 0;
986}
987
988int pthread_mutex_init(pthread_mutex_t *mutex,
989                       const pthread_mutexattr_t *attr)
990{
991    int value = 0;
992
993    if (mutex == NULL)
994        return EINVAL;
995
996    if (__likely(attr == NULL)) {
997        mutex->value = MUTEX_TYPE_BITS_NORMAL;
998        return 0;
999    }
1000
1001    if ((*attr & MUTEXATTR_SHARED_MASK) != 0)
1002        value |= MUTEX_SHARED_MASK;
1003
1004    switch (*attr & MUTEXATTR_TYPE_MASK) {
1005    case PTHREAD_MUTEX_NORMAL:
1006        value |= MUTEX_TYPE_BITS_NORMAL;
1007        break;
1008    case PTHREAD_MUTEX_RECURSIVE:
1009        value |= MUTEX_TYPE_BITS_RECURSIVE;
1010        break;
1011    case PTHREAD_MUTEX_ERRORCHECK:
1012        value |= MUTEX_TYPE_BITS_ERRORCHECK;
1013        break;
1014    default:
1015        return EINVAL;
1016    }
1017
1018    mutex->value = value;
1019    return 0;
1020}
1021
1022
1023/*
1024 * Lock a non-recursive mutex.
1025 *
1026 * As noted above, there are three states:
1027 *   0 (unlocked, no contention)
1028 *   1 (locked, no contention)
1029 *   2 (locked, contention)
1030 *
1031 * Non-recursive mutexes don't use the thread-id or counter fields, and the
1032 * "type" value is zero, so the only bits that will be set are the ones in
1033 * the lock state field.
1034 */
1035static __inline__ void
1036_normal_lock(pthread_mutex_t*  mutex, int shared)
1037{
1038    /* convenience shortcuts */
1039    const int unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
1040    const int locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
1041    /*
1042     * The common case is an unlocked mutex, so we begin by trying to
1043     * change the lock's state from 0 (UNLOCKED) to 1 (LOCKED).
1044     * __bionic_cmpxchg() returns 0 if it made the swap successfully.
1045     * If the result is nonzero, this lock is already held by another thread.
1046     */
1047    if (__bionic_cmpxchg(unlocked, locked_uncontended, &mutex->value) != 0) {
1048        const int locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
1049        /*
1050         * We want to go to sleep until the mutex is available, which
1051         * requires promoting it to state 2 (CONTENDED). We need to
1052         * swap in the new state value and then wait until somebody wakes us up.
1053         *
1054         * __bionic_swap() returns the previous value.  We swap 2 in and
1055         * see if we got zero back; if so, we have acquired the lock.  If
1056         * not, another thread still holds the lock and we wait again.
1057         *
1058         * The second argument to the __futex_wait() call is compared
1059         * against the current value.  If it doesn't match, __futex_wait()
1060         * returns immediately (otherwise, it sleeps for a time specified
1061         * by the third argument; 0 means sleep forever).  This ensures
1062         * that the mutex is in state 2 when we go to sleep on it, which
1063         * guarantees a wake-up call.
1064         */
1065        while (__bionic_swap(locked_contended, &mutex->value) != unlocked)
1066            __futex_wait_ex(&mutex->value, shared, locked_contended, 0);
1067    }
1068    ANDROID_MEMBAR_FULL();
1069}
1070
1071/*
1072 * Release a non-recursive mutex.  The caller is responsible for determining
1073 * that we are in fact the owner of this lock.
1074 */
1075static __inline__ void
1076_normal_unlock(pthread_mutex_t*  mutex, int shared)
1077{
1078    ANDROID_MEMBAR_FULL();
1079
1080    /*
1081     * The mutex state will be 1 or (rarely) 2.  We use an atomic decrement
1082     * to release the lock.  __bionic_atomic_dec() returns the previous value;
1083     * if it wasn't 1 we have to do some additional work.
1084     */
1085    if (__bionic_atomic_dec(&mutex->value) != (shared|MUTEX_STATE_BITS_LOCKED_UNCONTENDED)) {
1086        /*
1087         * Start by releasing the lock.  The decrement changed it from
1088         * "contended lock" to "uncontended lock", which means we still
1089         * hold it, and anybody who tries to sneak in will push it back
1090         * to state 2.
1091         *
1092         * Once we set it to zero the lock is up for grabs.  We follow
1093         * this with a __futex_wake() to ensure that one of the waiting
1094         * threads has a chance to grab it.
1095         *
1096         * This doesn't cause a race with the swap/wait pair in
1097         * _normal_lock(), because the __futex_wait() call there will
1098         * return immediately if the mutex value isn't 2.
1099         */
1100        mutex->value = shared;
1101
1102        /*
1103         * Wake up one waiting thread.  We don't know which thread will be
1104         * woken or when it'll start executing -- futexes make no guarantees
1105         * here.  There may not even be a thread waiting.
1106         *
1107         * The newly-woken thread will replace the 0 we just set above
1108         * with 2, which means that when it eventually releases the mutex
1109         * it will also call FUTEX_WAKE.  This results in one extra wake
1110         * call whenever a lock is contended, but lets us avoid forgetting
1111         * anyone without requiring us to track the number of sleepers.
1112         *
1113         * It's possible for another thread to sneak in and grab the lock
1114         * between the zero assignment above and the wake call below.  If
1115         * the new thread is "slow" and holds the lock for a while, we'll
1116         * wake up a sleeper, which will swap in a 2 and then go back to
1117         * sleep since the lock is still held.  If the new thread is "fast",
1118         * running to completion before we call wake, the thread we
1119         * eventually wake will find an unlocked mutex and will execute.
1120         * Either way we have correct behavior and nobody is orphaned on
1121         * the wait queue.
1122         */
1123        __futex_wake_ex(&mutex->value, shared, 1);
1124    }
1125}
1126
1127/* This common inlined function is used to increment the counter of an
1128 * errorcheck or recursive mutex.
1129 *
1130 * For errorcheck mutexes, it will return EDEADLK
1131 * If the counter overflows, it will return EAGAIN
1132 * Otherwise, it atomically increments the counter and returns 0
1133 * after providing an acquire barrier.
1134 *
1135 * mtype is the current mutex type
1136 * mvalue is the current mutex value (already loaded)
1137 * mutex pointers to the mutex.
1138 */
1139static __inline__ __attribute__((always_inline)) int
1140_recursive_increment(pthread_mutex_t* mutex, int mvalue, int mtype)
1141{
1142    if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) {
1143        /* trying to re-lock a mutex we already acquired */
1144        return EDEADLK;
1145    }
1146
1147    /* Detect recursive lock overflow and return EAGAIN.
1148     * This is safe because only the owner thread can modify the
1149     * counter bits in the mutex value.
1150     */
1151    if (MUTEX_COUNTER_BITS_WILL_OVERFLOW(mvalue)) {
1152        return EAGAIN;
1153    }
1154
1155    /* We own the mutex, but other threads are able to change
1156     * the lower bits (e.g. promoting it to "contended"), so we
1157     * need to use an atomic cmpxchg loop to update the counter.
1158     */
1159    for (;;) {
1160        /* increment counter, overflow was already checked */
1161        int newval = mvalue + MUTEX_COUNTER_BITS_ONE;
1162        if (__likely(__bionic_cmpxchg(mvalue, newval, &mutex->value) == 0)) {
1163            /* mutex is still locked, not need for a memory barrier */
1164            return 0;
1165        }
1166        /* the value was changed, this happens when another thread changes
1167         * the lower state bits from 1 to 2 to indicate contention. This
1168         * cannot change the counter, so simply reload and try again.
1169         */
1170        mvalue = mutex->value;
1171    }
1172}
1173
1174__LIBC_HIDDEN__
1175int pthread_mutex_lock_impl(pthread_mutex_t *mutex)
1176{
1177    int mvalue, mtype, tid, new_lock_type, shared;
1178
1179    if (__unlikely(mutex == NULL))
1180        return EINVAL;
1181
1182    mvalue = mutex->value;
1183    mtype = (mvalue & MUTEX_TYPE_MASK);
1184    shared = (mvalue & MUTEX_SHARED_MASK);
1185
1186    /* Handle normal case first */
1187    if ( __likely(mtype == MUTEX_TYPE_BITS_NORMAL) ) {
1188        _normal_lock(mutex, shared);
1189        return 0;
1190    }
1191
1192    /* Do we already own this recursive or error-check mutex ? */
1193    tid = __get_thread()->kernel_id;
1194    if ( tid == MUTEX_OWNER_FROM_BITS(mvalue) )
1195        return _recursive_increment(mutex, mvalue, mtype);
1196
1197    /* Add in shared state to avoid extra 'or' operations below */
1198    mtype |= shared;
1199
1200    /* First, if the mutex is unlocked, try to quickly acquire it.
1201     * In the optimistic case where this works, set the state to 1 to
1202     * indicate locked with no contention */
1203    if (mvalue == mtype) {
1204        int newval = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
1205        if (__bionic_cmpxchg(mvalue, newval, &mutex->value) == 0) {
1206            ANDROID_MEMBAR_FULL();
1207            return 0;
1208        }
1209        /* argh, the value changed, reload before entering the loop */
1210        mvalue = mutex->value;
1211    }
1212
1213    for (;;) {
1214        int newval;
1215
1216        /* if the mutex is unlocked, its value should be 'mtype' and
1217         * we try to acquire it by setting its owner and state atomically.
1218         * NOTE: We put the state to 2 since we _know_ there is contention
1219         * when we are in this loop. This ensures all waiters will be
1220         * unlocked.
1221         */
1222        if (mvalue == mtype) {
1223            newval = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_CONTENDED;
1224            /* TODO: Change this to __bionic_cmpxchg_acquire when we
1225             *        implement it to get rid of the explicit memory
1226             *        barrier below.
1227             */
1228            if (__unlikely(__bionic_cmpxchg(mvalue, newval, &mutex->value) != 0)) {
1229                mvalue = mutex->value;
1230                continue;
1231            }
1232            ANDROID_MEMBAR_FULL();
1233            return 0;
1234        }
1235
1236        /* the mutex is already locked by another thread, if its state is 1
1237         * we will change it to 2 to indicate contention. */
1238        if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(mvalue)) {
1239            newval = MUTEX_STATE_BITS_FLIP_CONTENTION(mvalue); /* locked state 1 => state 2 */
1240            if (__unlikely(__bionic_cmpxchg(mvalue, newval, &mutex->value) != 0)) {
1241                mvalue = mutex->value;
1242                continue;
1243            }
1244            mvalue = newval;
1245        }
1246
1247        /* wait until the mutex is unlocked */
1248        __futex_wait_ex(&mutex->value, shared, mvalue, NULL);
1249
1250        mvalue = mutex->value;
1251    }
1252    /* NOTREACHED */
1253}
1254
1255int pthread_mutex_lock(pthread_mutex_t *mutex)
1256{
1257    int err = pthread_mutex_lock_impl(mutex);
1258#ifdef PTHREAD_DEBUG
1259    if (PTHREAD_DEBUG_ENABLED) {
1260        if (!err) {
1261            pthread_debug_mutex_lock_check(mutex);
1262        }
1263    }
1264#endif
1265    return err;
1266}
1267
1268__LIBC_HIDDEN__
1269int pthread_mutex_unlock_impl(pthread_mutex_t *mutex)
1270{
1271    int mvalue, mtype, tid, oldv, shared;
1272
1273    if (__unlikely(mutex == NULL))
1274        return EINVAL;
1275
1276    mvalue = mutex->value;
1277    mtype  = (mvalue & MUTEX_TYPE_MASK);
1278    shared = (mvalue & MUTEX_SHARED_MASK);
1279
1280    /* Handle common case first */
1281    if (__likely(mtype == MUTEX_TYPE_BITS_NORMAL)) {
1282        _normal_unlock(mutex, shared);
1283        return 0;
1284    }
1285
1286    /* Do we already own this recursive or error-check mutex ? */
1287    tid = __get_thread()->kernel_id;
1288    if ( tid != MUTEX_OWNER_FROM_BITS(mvalue) )
1289        return EPERM;
1290
1291    /* If the counter is > 0, we can simply decrement it atomically.
1292     * Since other threads can mutate the lower state bits (and only the
1293     * lower state bits), use a cmpxchg to do it.
1294     */
1295    if (!MUTEX_COUNTER_BITS_IS_ZERO(mvalue)) {
1296        for (;;) {
1297            int newval = mvalue - MUTEX_COUNTER_BITS_ONE;
1298            if (__likely(__bionic_cmpxchg(mvalue, newval, &mutex->value) == 0)) {
1299                /* success: we still own the mutex, so no memory barrier */
1300                return 0;
1301            }
1302            /* the value changed, so reload and loop */
1303            mvalue = mutex->value;
1304        }
1305    }
1306
1307    /* the counter is 0, so we're going to unlock the mutex by resetting
1308     * its value to 'unlocked'. We need to perform a swap in order
1309     * to read the current state, which will be 2 if there are waiters
1310     * to awake.
1311     *
1312     * TODO: Change this to __bionic_swap_release when we implement it
1313     *        to get rid of the explicit memory barrier below.
1314     */
1315    ANDROID_MEMBAR_FULL();  /* RELEASE BARRIER */
1316    mvalue = __bionic_swap(mtype | shared | MUTEX_STATE_BITS_UNLOCKED, &mutex->value);
1317
1318    /* Wake one waiting thread, if any */
1319    if (MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(mvalue)) {
1320        __futex_wake_ex(&mutex->value, shared, 1);
1321    }
1322    return 0;
1323}
1324
1325int pthread_mutex_unlock(pthread_mutex_t *mutex)
1326{
1327#ifdef PTHREAD_DEBUG
1328    if (PTHREAD_DEBUG_ENABLED) {
1329        pthread_debug_mutex_unlock_check(mutex);
1330    }
1331#endif
1332    return pthread_mutex_unlock_impl(mutex);
1333}
1334
1335__LIBC_HIDDEN__
1336int pthread_mutex_trylock_impl(pthread_mutex_t *mutex)
1337{
1338    int mvalue, mtype, tid, oldv, shared;
1339
1340    if (__unlikely(mutex == NULL))
1341        return EINVAL;
1342
1343    mvalue = mutex->value;
1344    mtype  = (mvalue & MUTEX_TYPE_MASK);
1345    shared = (mvalue & MUTEX_SHARED_MASK);
1346
1347    /* Handle common case first */
1348    if ( __likely(mtype == MUTEX_TYPE_BITS_NORMAL) )
1349    {
1350        if (__bionic_cmpxchg(shared|MUTEX_STATE_BITS_UNLOCKED,
1351                             shared|MUTEX_STATE_BITS_LOCKED_UNCONTENDED,
1352                             &mutex->value) == 0) {
1353            ANDROID_MEMBAR_FULL();
1354            return 0;
1355        }
1356
1357        return EBUSY;
1358    }
1359
1360    /* Do we already own this recursive or error-check mutex ? */
1361    tid = __get_thread()->kernel_id;
1362    if ( tid == MUTEX_OWNER_FROM_BITS(mvalue) )
1363        return _recursive_increment(mutex, mvalue, mtype);
1364
1365    /* Same as pthread_mutex_lock, except that we don't want to wait, and
1366     * the only operation that can succeed is a single cmpxchg to acquire the
1367     * lock if it is released / not owned by anyone. No need for a complex loop.
1368     */
1369    mtype |= shared | MUTEX_STATE_BITS_UNLOCKED;
1370    mvalue = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
1371
1372    if (__likely(__bionic_cmpxchg(mtype, mvalue, &mutex->value) == 0)) {
1373        ANDROID_MEMBAR_FULL();
1374        return 0;
1375    }
1376
1377    return EBUSY;
1378}
1379
1380int pthread_mutex_trylock(pthread_mutex_t *mutex)
1381{
1382    int err = pthread_mutex_trylock_impl(mutex);
1383#ifdef PTHREAD_DEBUG
1384    if (PTHREAD_DEBUG_ENABLED) {
1385        if (!err) {
1386            pthread_debug_mutex_lock_check(mutex);
1387        }
1388    }
1389#endif
1390    return err;
1391}
1392
1393/* initialize 'ts' with the difference between 'abstime' and the current time
1394 * according to 'clock'. Returns -1 if abstime already expired, or 0 otherwise.
1395 */
1396static int
1397__timespec_to_absolute(struct timespec*  ts, const struct timespec*  abstime, clockid_t  clock)
1398{
1399    clock_gettime(clock, ts);
1400    ts->tv_sec  = abstime->tv_sec - ts->tv_sec;
1401    ts->tv_nsec = abstime->tv_nsec - ts->tv_nsec;
1402    if (ts->tv_nsec < 0) {
1403        ts->tv_sec--;
1404        ts->tv_nsec += 1000000000;
1405    }
1406    if ((ts->tv_nsec < 0) || (ts->tv_sec < 0))
1407        return -1;
1408
1409    return 0;
1410}
1411
1412/* initialize 'abstime' to the current time according to 'clock' plus 'msecs'
1413 * milliseconds.
1414 */
1415static void
1416__timespec_to_relative_msec(struct timespec*  abstime, unsigned  msecs, clockid_t  clock)
1417{
1418    clock_gettime(clock, abstime);
1419    abstime->tv_sec  += msecs/1000;
1420    abstime->tv_nsec += (msecs%1000)*1000000;
1421    if (abstime->tv_nsec >= 1000000000) {
1422        abstime->tv_sec++;
1423        abstime->tv_nsec -= 1000000000;
1424    }
1425}
1426
1427__LIBC_HIDDEN__
1428int pthread_mutex_lock_timeout_np_impl(pthread_mutex_t *mutex, unsigned msecs)
1429{
1430    clockid_t        clock = CLOCK_MONOTONIC;
1431    struct timespec  abstime;
1432    struct timespec  ts;
1433    int               mvalue, mtype, tid, oldv, new_lock_type, shared;
1434
1435    /* compute absolute expiration time */
1436    __timespec_to_relative_msec(&abstime, msecs, clock);
1437
1438    if (__unlikely(mutex == NULL))
1439        return EINVAL;
1440
1441    mvalue = mutex->value;
1442    mtype  = (mvalue & MUTEX_TYPE_MASK);
1443    shared = (mvalue & MUTEX_SHARED_MASK);
1444
1445    /* Handle common case first */
1446    if ( __likely(mtype == MUTEX_TYPE_BITS_NORMAL) )
1447    {
1448        const int unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
1449        const int locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
1450        const int locked_contended   = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
1451
1452        /* fast path for uncontended lock. Note: MUTEX_TYPE_BITS_NORMAL is 0 */
1453        if (__bionic_cmpxchg(unlocked, locked_uncontended, &mutex->value) == 0) {
1454            ANDROID_MEMBAR_FULL();
1455            return 0;
1456        }
1457
1458        /* loop while needed */
1459        while (__bionic_swap(locked_contended, &mutex->value) != unlocked) {
1460            if (__timespec_to_absolute(&ts, &abstime, clock) < 0)
1461                return EBUSY;
1462
1463            __futex_wait_ex(&mutex->value, shared, locked_contended, &ts);
1464        }
1465        ANDROID_MEMBAR_FULL();
1466        return 0;
1467    }
1468
1469    /* Do we already own this recursive or error-check mutex ? */
1470    tid = __get_thread()->kernel_id;
1471    if ( tid == MUTEX_OWNER_FROM_BITS(mvalue) )
1472        return _recursive_increment(mutex, mvalue, mtype);
1473
1474    /* the following implements the same loop than pthread_mutex_lock_impl
1475     * but adds checks to ensure that the operation never exceeds the
1476     * absolute expiration time.
1477     */
1478    mtype |= shared;
1479
1480    /* first try a quick lock */
1481    if (mvalue == mtype) {
1482        mvalue = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
1483        if (__likely(__bionic_cmpxchg(mtype, mvalue, &mutex->value) == 0)) {
1484            ANDROID_MEMBAR_FULL();
1485            return 0;
1486        }
1487        mvalue = mutex->value;
1488    }
1489
1490    for (;;) {
1491        struct timespec ts;
1492
1493        /* if the value is 'unlocked', try to acquire it directly */
1494        /* NOTE: put state to 2 since we know there is contention */
1495        if (mvalue == mtype) /* unlocked */ {
1496            mvalue = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_CONTENDED;
1497            if (__bionic_cmpxchg(mtype, mvalue, &mutex->value) == 0) {
1498                ANDROID_MEMBAR_FULL();
1499                return 0;
1500            }
1501            /* the value changed before we could lock it. We need to check
1502             * the time to avoid livelocks, reload the value, then loop again. */
1503            if (__timespec_to_absolute(&ts, &abstime, clock) < 0)
1504                return EBUSY;
1505
1506            mvalue = mutex->value;
1507            continue;
1508        }
1509
1510        /* The value is locked. If 'uncontended', try to switch its state
1511         * to 'contented' to ensure we get woken up later. */
1512        if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(mvalue)) {
1513            int newval = MUTEX_STATE_BITS_FLIP_CONTENTION(mvalue);
1514            if (__bionic_cmpxchg(mvalue, newval, &mutex->value) != 0) {
1515                /* this failed because the value changed, reload it */
1516                mvalue = mutex->value;
1517            } else {
1518                /* this succeeded, update mvalue */
1519                mvalue = newval;
1520            }
1521        }
1522
1523        /* check time and update 'ts' */
1524        if (__timespec_to_absolute(&ts, &abstime, clock) < 0)
1525            return EBUSY;
1526
1527        /* Only wait to be woken up if the state is '2', otherwise we'll
1528         * simply loop right now. This can happen when the second cmpxchg
1529         * in our loop failed because the mutex was unlocked by another
1530         * thread.
1531         */
1532        if (MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(mvalue)) {
1533            if (__futex_wait_ex(&mutex->value, shared, mvalue, &ts) == ETIMEDOUT) {
1534                return EBUSY;
1535            }
1536            mvalue = mutex->value;
1537        }
1538    }
1539    /* NOTREACHED */
1540}
1541
1542int pthread_mutex_lock_timeout_np(pthread_mutex_t *mutex, unsigned msecs)
1543{
1544    int err = pthread_mutex_lock_timeout_np_impl(mutex, msecs);
1545#ifdef PTHREAD_DEBUG
1546    if (PTHREAD_DEBUG_ENABLED) {
1547        if (!err) {
1548            pthread_debug_mutex_lock_check(mutex);
1549        }
1550    }
1551#endif
1552    return err;
1553}
1554
1555int pthread_mutex_destroy(pthread_mutex_t *mutex)
1556{
1557    int ret;
1558
1559    /* use trylock to ensure that the mutex value is
1560     * valid and is not already locked. */
1561    ret = pthread_mutex_trylock_impl(mutex);
1562    if (ret != 0)
1563        return ret;
1564
1565    mutex->value = 0xdead10cc;
1566    return 0;
1567}
1568
1569
1570
1571int pthread_condattr_init(pthread_condattr_t *attr)
1572{
1573    if (attr == NULL)
1574        return EINVAL;
1575
1576    *attr = PTHREAD_PROCESS_PRIVATE;
1577    return 0;
1578}
1579
1580int pthread_condattr_getpshared(pthread_condattr_t *attr, int *pshared)
1581{
1582    if (attr == NULL || pshared == NULL)
1583        return EINVAL;
1584
1585    *pshared = *attr;
1586    return 0;
1587}
1588
1589int pthread_condattr_setpshared(pthread_condattr_t *attr, int pshared)
1590{
1591    if (attr == NULL)
1592        return EINVAL;
1593
1594    if (pshared != PTHREAD_PROCESS_SHARED &&
1595        pshared != PTHREAD_PROCESS_PRIVATE)
1596        return EINVAL;
1597
1598    *attr = pshared;
1599    return 0;
1600}
1601
1602int pthread_condattr_destroy(pthread_condattr_t *attr)
1603{
1604    if (attr == NULL)
1605        return EINVAL;
1606
1607    *attr = 0xdeada11d;
1608    return 0;
1609}
1610
1611/* We use one bit in condition variable values as the 'shared' flag
1612 * The rest is a counter.
1613 */
1614#define COND_SHARED_MASK        0x0001
1615#define COND_COUNTER_INCREMENT  0x0002
1616#define COND_COUNTER_MASK       (~COND_SHARED_MASK)
1617
1618#define COND_IS_SHARED(c)  (((c)->value & COND_SHARED_MASK) != 0)
1619
1620/* XXX *technically* there is a race condition that could allow
1621 * XXX a signal to be missed.  If thread A is preempted in _wait()
1622 * XXX after unlocking the mutex and before waiting, and if other
1623 * XXX threads call signal or broadcast UINT_MAX/2 times (exactly),
1624 * XXX before thread A is scheduled again and calls futex_wait(),
1625 * XXX then the signal will be lost.
1626 */
1627
1628int pthread_cond_init(pthread_cond_t *cond,
1629                      const pthread_condattr_t *attr)
1630{
1631    if (cond == NULL)
1632        return EINVAL;
1633
1634    cond->value = 0;
1635
1636    if (attr != NULL && *attr == PTHREAD_PROCESS_SHARED)
1637        cond->value |= COND_SHARED_MASK;
1638
1639    return 0;
1640}
1641
1642int pthread_cond_destroy(pthread_cond_t *cond)
1643{
1644    if (cond == NULL)
1645        return EINVAL;
1646
1647    cond->value = 0xdeadc04d;
1648    return 0;
1649}
1650
1651/* This function is used by pthread_cond_broadcast and
1652 * pthread_cond_signal to atomically decrement the counter
1653 * then wake-up 'counter' threads.
1654 */
1655static int
1656__pthread_cond_pulse(pthread_cond_t *cond, int  counter)
1657{
1658    long flags;
1659
1660    if (__unlikely(cond == NULL))
1661        return EINVAL;
1662
1663    flags = (cond->value & ~COND_COUNTER_MASK);
1664    for (;;) {
1665        long oldval = cond->value;
1666        long newval = ((oldval - COND_COUNTER_INCREMENT) & COND_COUNTER_MASK)
1667                      | flags;
1668        if (__bionic_cmpxchg(oldval, newval, &cond->value) == 0)
1669            break;
1670    }
1671
1672    /*
1673     * Ensure that all memory accesses previously made by this thread are
1674     * visible to the woken thread(s).  On the other side, the "wait"
1675     * code will issue any necessary barriers when locking the mutex.
1676     *
1677     * This may not strictly be necessary -- if the caller follows
1678     * recommended practice and holds the mutex before signaling the cond
1679     * var, the mutex ops will provide correct semantics.  If they don't
1680     * hold the mutex, they're subject to race conditions anyway.
1681     */
1682    ANDROID_MEMBAR_FULL();
1683
1684    __futex_wake_ex(&cond->value, COND_IS_SHARED(cond), counter);
1685    return 0;
1686}
1687
1688int pthread_cond_broadcast(pthread_cond_t *cond)
1689{
1690    return __pthread_cond_pulse(cond, INT_MAX);
1691}
1692
1693int pthread_cond_signal(pthread_cond_t *cond)
1694{
1695    return __pthread_cond_pulse(cond, 1);
1696}
1697
1698int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
1699{
1700    return pthread_cond_timedwait(cond, mutex, NULL);
1701}
1702
1703int __pthread_cond_timedwait_relative(pthread_cond_t *cond,
1704                                      pthread_mutex_t * mutex,
1705                                      const struct timespec *reltime)
1706{
1707    int  status;
1708    int  oldvalue = cond->value;
1709
1710    pthread_mutex_unlock(mutex);
1711    status = __futex_wait_ex(&cond->value, COND_IS_SHARED(cond), oldvalue, reltime);
1712    pthread_mutex_lock(mutex);
1713
1714    if (status == (-ETIMEDOUT)) return ETIMEDOUT;
1715    return 0;
1716}
1717
1718int __pthread_cond_timedwait(pthread_cond_t *cond,
1719                             pthread_mutex_t * mutex,
1720                             const struct timespec *abstime,
1721                             clockid_t clock)
1722{
1723    struct timespec ts;
1724    struct timespec * tsp;
1725
1726    if (abstime != NULL) {
1727        if (__timespec_to_absolute(&ts, abstime, clock) < 0)
1728            return ETIMEDOUT;
1729        tsp = &ts;
1730    } else {
1731        tsp = NULL;
1732    }
1733
1734    return __pthread_cond_timedwait_relative(cond, mutex, tsp);
1735}
1736
1737int pthread_cond_timedwait(pthread_cond_t *cond,
1738                           pthread_mutex_t * mutex,
1739                           const struct timespec *abstime)
1740{
1741    return __pthread_cond_timedwait(cond, mutex, abstime, CLOCK_REALTIME);
1742}
1743
1744
1745/* this one exists only for backward binary compatibility */
1746int pthread_cond_timedwait_monotonic(pthread_cond_t *cond,
1747                                     pthread_mutex_t * mutex,
1748                                     const struct timespec *abstime)
1749{
1750    return __pthread_cond_timedwait(cond, mutex, abstime, CLOCK_MONOTONIC);
1751}
1752
1753int pthread_cond_timedwait_monotonic_np(pthread_cond_t *cond,
1754                                     pthread_mutex_t * mutex,
1755                                     const struct timespec *abstime)
1756{
1757    return __pthread_cond_timedwait(cond, mutex, abstime, CLOCK_MONOTONIC);
1758}
1759
1760int pthread_cond_timedwait_relative_np(pthread_cond_t *cond,
1761                                      pthread_mutex_t * mutex,
1762                                      const struct timespec *reltime)
1763{
1764    return __pthread_cond_timedwait_relative(cond, mutex, reltime);
1765}
1766
1767int pthread_cond_timeout_np(pthread_cond_t *cond,
1768                            pthread_mutex_t * mutex,
1769                            unsigned msecs)
1770{
1771    struct timespec ts;
1772
1773    ts.tv_sec = msecs / 1000;
1774    ts.tv_nsec = (msecs % 1000) * 1000000;
1775
1776    return __pthread_cond_timedwait_relative(cond, mutex, &ts);
1777}
1778
1779
1780
1781/* A technical note regarding our thread-local-storage (TLS) implementation:
1782 *
1783 * There can be up to TLSMAP_SIZE independent TLS keys in a given process,
1784 * though the first TLSMAP_START keys are reserved for Bionic to hold
1785 * special thread-specific variables like errno or a pointer to
1786 * the current thread's descriptor.
1787 *
1788 * while stored in the TLS area, these entries cannot be accessed through
1789 * pthread_getspecific() / pthread_setspecific() and pthread_key_delete()
1790 *
1791 * also, some entries in the key table are pre-allocated (see tlsmap_lock)
1792 * to greatly simplify and speedup some OpenGL-related operations. though the
1793 * initialy value will be NULL on all threads.
1794 *
1795 * you can use pthread_getspecific()/setspecific() on these, and in theory
1796 * you could also call pthread_key_delete() as well, though this would
1797 * probably break some apps.
1798 *
1799 * The 'tlsmap_t' type defined below implements a shared global map of
1800 * currently created/allocated TLS keys and the destructors associated
1801 * with them. You should use tlsmap_lock/unlock to access it to avoid
1802 * any race condition.
1803 *
1804 * the global TLS map simply contains a bitmap of allocated keys, and
1805 * an array of destructors.
1806 *
1807 * each thread has a TLS area that is a simple array of TLSMAP_SIZE void*
1808 * pointers. the TLS area of the main thread is stack-allocated in
1809 * __libc_init_common, while the TLS area of other threads is placed at
1810 * the top of their stack in pthread_create.
1811 *
1812 * when pthread_key_create() is called, it finds the first free key in the
1813 * bitmap, then set it to 1, saving the destructor altogether
1814 *
1815 * when pthread_key_delete() is called. it will erase the key's bitmap bit
1816 * and its destructor, and will also clear the key data in the TLS area of
1817 * all created threads. As mandated by Posix, it is the responsability of
1818 * the caller of pthread_key_delete() to properly reclaim the objects that
1819 * were pointed to by these data fields (either before or after the call).
1820 *
1821 */
1822
1823/* TLS Map implementation
1824 */
1825
1826#define TLSMAP_START      (TLS_SLOT_MAX_WELL_KNOWN+1)
1827#define TLSMAP_SIZE       BIONIC_TLS_SLOTS
1828#define TLSMAP_BITS       32
1829#define TLSMAP_WORDS      ((TLSMAP_SIZE+TLSMAP_BITS-1)/TLSMAP_BITS)
1830#define TLSMAP_WORD(m,k)  (m)->map[(k)/TLSMAP_BITS]
1831#define TLSMAP_MASK(k)    (1U << ((k)&(TLSMAP_BITS-1)))
1832
1833/* this macro is used to quickly check that a key belongs to a reasonable range */
1834#define TLSMAP_VALIDATE_KEY(key)  \
1835    ((key) >= TLSMAP_START && (key) < TLSMAP_SIZE)
1836
1837/* the type of tls key destructor functions */
1838typedef void (*tls_dtor_t)(void*);
1839
1840typedef struct {
1841    int         init;                  /* see comment in tlsmap_lock() */
1842    uint32_t    map[TLSMAP_WORDS];     /* bitmap of allocated keys */
1843    tls_dtor_t  dtors[TLSMAP_SIZE];    /* key destructors */
1844} tlsmap_t;
1845
1846static pthread_mutex_t  _tlsmap_lock = PTHREAD_MUTEX_INITIALIZER;
1847static tlsmap_t         _tlsmap;
1848
1849/* lock the global TLS map lock and return a handle to it */
1850static __inline__ tlsmap_t* tlsmap_lock(void)
1851{
1852    tlsmap_t*   m = &_tlsmap;
1853
1854    pthread_mutex_lock(&_tlsmap_lock);
1855    /* we need to initialize the first entry of the 'map' array
1856     * with the value TLS_DEFAULT_ALLOC_MAP. doing it statically
1857     * when declaring _tlsmap is a bit awkward and is going to
1858     * produce warnings, so do it the first time we use the map
1859     * instead
1860     */
1861    if (__unlikely(!m->init)) {
1862        TLSMAP_WORD(m,0) = TLS_DEFAULT_ALLOC_MAP;
1863        m->init          = 1;
1864    }
1865    return m;
1866}
1867
1868/* unlock the global TLS map */
1869static __inline__ void tlsmap_unlock(tlsmap_t*  m)
1870{
1871    pthread_mutex_unlock(&_tlsmap_lock);
1872    (void)m;  /* a good compiler is a happy compiler */
1873}
1874
1875/* test to see wether a key is allocated */
1876static __inline__ int tlsmap_test(tlsmap_t*  m, int  key)
1877{
1878    return (TLSMAP_WORD(m,key) & TLSMAP_MASK(key)) != 0;
1879}
1880
1881/* set the destructor and bit flag on a newly allocated key */
1882static __inline__ void tlsmap_set(tlsmap_t*  m, int  key, tls_dtor_t  dtor)
1883{
1884    TLSMAP_WORD(m,key) |= TLSMAP_MASK(key);
1885    m->dtors[key]       = dtor;
1886}
1887
1888/* clear the destructor and bit flag on an existing key */
1889static __inline__ void  tlsmap_clear(tlsmap_t*  m, int  key)
1890{
1891    TLSMAP_WORD(m,key) &= ~TLSMAP_MASK(key);
1892    m->dtors[key]       = NULL;
1893}
1894
1895/* allocate a new TLS key, return -1 if no room left */
1896static int tlsmap_alloc(tlsmap_t*  m, tls_dtor_t  dtor)
1897{
1898    int  key;
1899
1900    for ( key = TLSMAP_START; key < TLSMAP_SIZE; key++ ) {
1901        if ( !tlsmap_test(m, key) ) {
1902            tlsmap_set(m, key, dtor);
1903            return key;
1904        }
1905    }
1906    return -1;
1907}
1908
1909
1910int pthread_key_create(pthread_key_t *key, void (*destructor_function)(void *))
1911{
1912    uint32_t   err = ENOMEM;
1913    tlsmap_t*  map = tlsmap_lock();
1914    int        k   = tlsmap_alloc(map, destructor_function);
1915
1916    if (k >= 0) {
1917        *key = k;
1918        err  = 0;
1919    }
1920    tlsmap_unlock(map);
1921    return err;
1922}
1923
1924
1925/* This deletes a pthread_key_t. note that the standard mandates that this does
1926 * not call the destructor of non-NULL key values. Instead, it is the
1927 * responsability of the caller to properly dispose of the corresponding data
1928 * and resources, using any mean it finds suitable.
1929 *
1930 * On the other hand, this function will clear the corresponding key data
1931 * values in all known threads. this prevents later (invalid) calls to
1932 * pthread_getspecific() to receive invalid/stale values.
1933 */
1934int pthread_key_delete(pthread_key_t key)
1935{
1936    uint32_t             err;
1937    pthread_internal_t*  thr;
1938    tlsmap_t*            map;
1939
1940    if (!TLSMAP_VALIDATE_KEY(key)) {
1941        return EINVAL;
1942    }
1943
1944    map = tlsmap_lock();
1945
1946    if (!tlsmap_test(map, key)) {
1947        err = EINVAL;
1948        goto err1;
1949    }
1950
1951    /* clear value in all threads */
1952    pthread_mutex_lock(&gThreadListLock);
1953    for ( thr = gThreadList; thr != NULL; thr = thr->next ) {
1954        /* avoid zombie threads with a negative 'join_count'. these are really
1955         * already dead and don't have a TLS area anymore.
1956         *
1957         * similarly, it is possible to have thr->tls == NULL for threads that
1958         * were just recently created through pthread_create() but whose
1959         * startup trampoline (__thread_entry) hasn't been run yet by the
1960         * scheduler. so check for this too.
1961         */
1962        if (thr->join_count < 0 || !thr->tls)
1963            continue;
1964
1965        thr->tls[key] = NULL;
1966    }
1967    tlsmap_clear(map, key);
1968
1969    pthread_mutex_unlock(&gThreadListLock);
1970    err = 0;
1971
1972err1:
1973    tlsmap_unlock(map);
1974    return err;
1975}
1976
1977
1978int pthread_setspecific(pthread_key_t key, const void *ptr)
1979{
1980    int        err = EINVAL;
1981    tlsmap_t*  map;
1982
1983    if (TLSMAP_VALIDATE_KEY(key)) {
1984        /* check that we're trying to set data for an allocated key */
1985        map = tlsmap_lock();
1986        if (tlsmap_test(map, key)) {
1987            ((uint32_t *)__get_tls())[key] = (uint32_t)ptr;
1988            err = 0;
1989        }
1990        tlsmap_unlock(map);
1991    }
1992    return err;
1993}
1994
1995void * pthread_getspecific(pthread_key_t key)
1996{
1997    if (!TLSMAP_VALIDATE_KEY(key)) {
1998        return NULL;
1999    }
2000
2001    /* for performance reason, we do not lock/unlock the global TLS map
2002     * to check that the key is properly allocated. if the key was not
2003     * allocated, the value read from the TLS should always be NULL
2004     * due to pthread_key_delete() clearing the values for all threads.
2005     */
2006    return (void *)(((unsigned *)__get_tls())[key]);
2007}
2008
2009/* Posix mandates that this be defined in <limits.h> but we don't have
2010 * it just yet.
2011 */
2012#ifndef PTHREAD_DESTRUCTOR_ITERATIONS
2013#  define PTHREAD_DESTRUCTOR_ITERATIONS  4
2014#endif
2015
2016/* this function is called from pthread_exit() to remove all TLS key data
2017 * from this thread's TLS area. this must call the destructor of all keys
2018 * that have a non-NULL data value (and a non-NULL destructor).
2019 *
2020 * because destructors can do funky things like deleting/creating other
2021 * keys, we need to implement this in a loop
2022 */
2023static void pthread_key_clean_all(void)
2024{
2025    tlsmap_t*    map;
2026    void**       tls = (void**)__get_tls();
2027    int          rounds = PTHREAD_DESTRUCTOR_ITERATIONS;
2028
2029    map = tlsmap_lock();
2030
2031    for (rounds = PTHREAD_DESTRUCTOR_ITERATIONS; rounds > 0; rounds--)
2032    {
2033        int  kk, count = 0;
2034
2035        for (kk = TLSMAP_START; kk < TLSMAP_SIZE; kk++) {
2036            if ( tlsmap_test(map, kk) )
2037            {
2038                void*       data = tls[kk];
2039                tls_dtor_t  dtor = map->dtors[kk];
2040
2041                if (data != NULL && dtor != NULL)
2042                {
2043                   /* we need to clear the key data now, this will prevent the
2044                    * destructor (or a later one) from seeing the old value if
2045                    * it calls pthread_getspecific() for some odd reason
2046                    *
2047                    * we do not do this if 'dtor == NULL' just in case another
2048                    * destructor function might be responsible for manually
2049                    * releasing the corresponding data.
2050                    */
2051                    tls[kk] = NULL;
2052
2053                   /* because the destructor is free to call pthread_key_create
2054                    * and/or pthread_key_delete, we need to temporarily unlock
2055                    * the TLS map
2056                    */
2057                    tlsmap_unlock(map);
2058                    (*dtor)(data);
2059                    map = tlsmap_lock();
2060
2061                    count += 1;
2062                }
2063            }
2064        }
2065
2066        /* if we didn't call any destructor, there is no need to check the
2067         * TLS data again
2068         */
2069        if (count == 0)
2070            break;
2071    }
2072    tlsmap_unlock(map);
2073}
2074
2075// man says this should be in <linux/unistd.h>, but it isn't
2076extern int tgkill(int tgid, int tid, int sig);
2077
2078int pthread_kill(pthread_t tid, int sig)
2079{
2080    int  ret;
2081    int  old_errno = errno;
2082    pthread_internal_t * thread = (pthread_internal_t *)tid;
2083
2084    ret = tgkill(getpid(), thread->kernel_id, sig);
2085    if (ret < 0) {
2086        ret = errno;
2087        errno = old_errno;
2088    }
2089
2090    return ret;
2091}
2092
2093/* Despite the fact that our kernel headers define sigset_t explicitly
2094 * as a 32-bit integer, the kernel system call really expects a 64-bit
2095 * bitmap for the signal set, or more exactly an array of two-32-bit
2096 * values (see $KERNEL/arch/$ARCH/include/asm/signal.h for details).
2097 *
2098 * Unfortunately, we cannot fix the sigset_t definition without breaking
2099 * the C library ABI, so perform a little runtime translation here.
2100 */
2101typedef union {
2102    sigset_t   bionic;
2103    uint32_t   kernel[2];
2104} kernel_sigset_t;
2105
2106/* this is a private syscall stub */
2107extern int __rt_sigprocmask(int, const kernel_sigset_t *, kernel_sigset_t *, size_t);
2108
2109int pthread_sigmask(int how, const sigset_t *set, sigset_t *oset)
2110{
2111    /* pthread_sigmask must return the error code, but the syscall
2112     * will set errno instead and return 0/-1
2113     */
2114    int ret, old_errno = errno;
2115
2116    /* We must convert *set into a kernel_sigset_t */
2117    kernel_sigset_t  in_set, *in_set_ptr;
2118    kernel_sigset_t  out_set;
2119
2120    in_set.kernel[0] = in_set.kernel[1] = 0;
2121    out_set.kernel[0] = out_set.kernel[1] = 0;
2122
2123    /* 'in_set_ptr' is the second parameter to __rt_sigprocmask. It must be NULL
2124     * if 'set' is NULL to ensure correct semantics (which in this case would
2125     * be to ignore 'how' and return the current signal set into 'oset'.
2126     */
2127    if (set == NULL) {
2128        in_set_ptr = NULL;
2129    } else {
2130        in_set.bionic = *set;
2131        in_set_ptr = &in_set;
2132    }
2133
2134    ret = __rt_sigprocmask(how, in_set_ptr, &out_set, sizeof(kernel_sigset_t));
2135    if (ret < 0)
2136        ret = errno;
2137
2138    if (oset)
2139        *oset = out_set.bionic;
2140
2141    errno = old_errno;
2142    return ret;
2143}
2144
2145
2146int pthread_getcpuclockid(pthread_t  tid, clockid_t  *clockid)
2147{
2148    const int            CLOCK_IDTYPE_BITS = 3;
2149    pthread_internal_t*  thread = (pthread_internal_t*)tid;
2150
2151    if (!thread)
2152        return ESRCH;
2153
2154    *clockid = CLOCK_THREAD_CPUTIME_ID | (thread->kernel_id << CLOCK_IDTYPE_BITS);
2155    return 0;
2156}
2157
2158
2159/* NOTE: this implementation doesn't support a init function that throws a C++ exception
2160 *       or calls fork()
2161 */
2162int  pthread_once( pthread_once_t*  once_control,  void (*init_routine)(void) )
2163{
2164    static pthread_mutex_t   once_lock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER;
2165    volatile pthread_once_t* ocptr = once_control;
2166    pthread_once_t value;
2167
2168    /* PTHREAD_ONCE_INIT is 0, we use the following bit flags
2169     *
2170     *   bit 0 set  -> initialization is under way
2171     *   bit 1 set  -> initialization is complete
2172     */
2173#define ONCE_INITIALIZING           (1 << 0)
2174#define ONCE_COMPLETED              (1 << 1)
2175
2176    /* First check if the once is already initialized. This will be the common
2177    * case and we want to make this as fast as possible. Note that this still
2178    * requires a load_acquire operation here to ensure that all the
2179    * stores performed by the initialization function are observable on
2180    * this CPU after we exit.
2181    */
2182    if (__likely((*ocptr & ONCE_COMPLETED) != 0)) {
2183        ANDROID_MEMBAR_FULL();
2184        return 0;
2185    }
2186
2187    for (;;) {
2188        /* Try to atomically set the INITIALIZING flag.
2189         * This requires a cmpxchg loop, and we may need
2190         * to exit prematurely if we detect that
2191         * COMPLETED is now set.
2192         */
2193        int32_t  oldval, newval;
2194
2195        do {
2196            oldval = *ocptr;
2197            if ((oldval & ONCE_COMPLETED) != 0)
2198                break;
2199
2200            newval = oldval | ONCE_INITIALIZING;
2201        } while (__bionic_cmpxchg(oldval, newval, ocptr) != 0);
2202
2203        if ((oldval & ONCE_COMPLETED) != 0) {
2204            /* We detected that COMPLETED was set while in our loop */
2205            ANDROID_MEMBAR_FULL();
2206            return 0;
2207        }
2208
2209        if ((oldval & ONCE_INITIALIZING) == 0) {
2210            /* We got there first, we can jump out of the loop to
2211             * handle the initialization */
2212            break;
2213        }
2214
2215        /* Another thread is running the initialization and hasn't completed
2216         * yet, so wait for it, then try again. */
2217        __futex_wait_ex(ocptr, 0, oldval, NULL);
2218    }
2219
2220    /* call the initialization function. */
2221    (*init_routine)();
2222
2223    /* Do a store_release indicating that initialization is complete */
2224    ANDROID_MEMBAR_FULL();
2225    *ocptr = ONCE_COMPLETED;
2226
2227    /* Wake up any waiters, if any */
2228    __futex_wake_ex(ocptr, 0, INT_MAX);
2229
2230    return 0;
2231}
2232
2233/* This value is not exported by kernel headers, so hardcode it here */
2234#define MAX_TASK_COMM_LEN	16
2235#define TASK_COMM_FMT 		"/proc/self/task/%u/comm"
2236
2237int pthread_setname_np(pthread_t thid, const char *thname)
2238{
2239    size_t thname_len;
2240    int saved_errno, ret;
2241
2242    if (thid == 0 || thname == NULL)
2243        return EINVAL;
2244
2245    thname_len = strlen(thname);
2246    if (thname_len >= MAX_TASK_COMM_LEN)
2247        return ERANGE;
2248
2249    saved_errno = errno;
2250    if (thid == pthread_self())
2251    {
2252        ret = prctl(PR_SET_NAME, (unsigned long)thname, 0, 0, 0) ? errno : 0;
2253    }
2254    else
2255    {
2256        /* Have to change another thread's name */
2257        pthread_internal_t *thread = (pthread_internal_t *)thid;
2258        char comm_name[sizeof(TASK_COMM_FMT) + 8];
2259        ssize_t n;
2260        int fd;
2261
2262        snprintf(comm_name, sizeof(comm_name), TASK_COMM_FMT, (unsigned int)thread->kernel_id);
2263        fd = open(comm_name, O_RDWR);
2264        if (fd == -1)
2265        {
2266            ret = errno;
2267            goto exit;
2268        }
2269        n = TEMP_FAILURE_RETRY(write(fd, thname, thname_len));
2270        close(fd);
2271
2272        if (n < 0)
2273            ret = errno;
2274        else if ((size_t)n != thname_len)
2275            ret = EIO;
2276        else
2277            ret = 0;
2278    }
2279exit:
2280    errno = saved_errno;
2281    return ret;
2282}
2283
2284/* Return the kernel thread ID for a pthread.
2285 * This is only defined for implementations where pthread <-> kernel is 1:1, which this is.
2286 * Not the same as pthread_getthreadid_np, which is commonly defined to be opaque.
2287 * Internal, not an NDK API.
2288 */
2289
2290pid_t __pthread_gettid(pthread_t thid)
2291{
2292    pthread_internal_t* thread = (pthread_internal_t*)thid;
2293    return thread->kernel_id;
2294}
2295