1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "thread_list.h"
18
19#include <backtrace/BacktraceMap.h>
20#include <dirent.h>
21#include <ScopedLocalRef.h>
22#include <ScopedUtfChars.h>
23#include <sys/types.h>
24#include <unistd.h>
25
26#include <sstream>
27
28#include "base/histogram-inl.h"
29#include "base/mutex-inl.h"
30#include "base/systrace.h"
31#include "base/time_utils.h"
32#include "base/timing_logger.h"
33#include "debugger.h"
34#include "gc/collector/concurrent_copying.h"
35#include "jni_internal.h"
36#include "lock_word.h"
37#include "monitor.h"
38#include "scoped_thread_state_change.h"
39#include "thread.h"
40#include "trace.h"
41#include "well_known_classes.h"
42
43#if ART_USE_FUTEXES
44#include "linux/futex.h"
45#include "sys/syscall.h"
46#ifndef SYS_futex
47#define SYS_futex __NR_futex
48#endif
49#endif  // ART_USE_FUTEXES
50
51namespace art {
52
53static constexpr uint64_t kLongThreadSuspendThreshold = MsToNs(5);
54static constexpr uint64_t kThreadSuspendTimeoutMs = 30 * 1000;  // 30s.
55// Use 0 since we want to yield to prevent blocking for an unpredictable amount of time.
56static constexpr useconds_t kThreadSuspendInitialSleepUs = 0;
57static constexpr useconds_t kThreadSuspendMaxYieldUs = 3000;
58static constexpr useconds_t kThreadSuspendMaxSleepUs = 5000;
59
60// Whether we should try to dump the native stack of unattached threads. See commit ed8b723 for
61// some history.
62// Turned off again. b/29248079
63static constexpr bool kDumpUnattachedThreadNativeStack = false;
64
65ThreadList::ThreadList()
66    : suspend_all_count_(0),
67      debug_suspend_all_count_(0),
68      unregistering_count_(0),
69      suspend_all_historam_("suspend all histogram", 16, 64),
70      long_suspend_(false) {
71  CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U)));
72}
73
74ThreadList::~ThreadList() {
75  ScopedTrace trace(__PRETTY_FUNCTION__);
76  // Detach the current thread if necessary. If we failed to start, there might not be any threads.
77  // We need to detach the current thread here in case there's another thread waiting to join with
78  // us.
79  bool contains = false;
80  Thread* self = Thread::Current();
81  {
82    MutexLock mu(self, *Locks::thread_list_lock_);
83    contains = Contains(self);
84  }
85  if (contains) {
86    Runtime::Current()->DetachCurrentThread();
87  }
88  WaitForOtherNonDaemonThreadsToExit();
89  // Disable GC and wait for GC to complete in case there are still daemon threads doing
90  // allocations.
91  gc::Heap* const heap = Runtime::Current()->GetHeap();
92  heap->DisableGCForShutdown();
93  // In case a GC is in progress, wait for it to finish.
94  heap->WaitForGcToComplete(gc::kGcCauseBackground, Thread::Current());
95  // TODO: there's an unaddressed race here where a thread may attach during shutdown, see
96  //       Thread::Init.
97  SuspendAllDaemonThreadsForShutdown();
98}
99
100bool ThreadList::Contains(Thread* thread) {
101  return find(list_.begin(), list_.end(), thread) != list_.end();
102}
103
104bool ThreadList::Contains(pid_t tid) {
105  for (const auto& thread : list_) {
106    if (thread->GetTid() == tid) {
107      return true;
108    }
109  }
110  return false;
111}
112
113pid_t ThreadList::GetLockOwner() {
114  return Locks::thread_list_lock_->GetExclusiveOwnerTid();
115}
116
117void ThreadList::DumpNativeStacks(std::ostream& os) {
118  MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
119  std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid()));
120  for (const auto& thread : list_) {
121    os << "DUMPING THREAD " << thread->GetTid() << "\n";
122    DumpNativeStack(os, thread->GetTid(), map.get(), "\t");
123    os << "\n";
124  }
125}
126
127void ThreadList::DumpForSigQuit(std::ostream& os) {
128  {
129    ScopedObjectAccess soa(Thread::Current());
130    // Only print if we have samples.
131    if (suspend_all_historam_.SampleSize() > 0) {
132      Histogram<uint64_t>::CumulativeData data;
133      suspend_all_historam_.CreateHistogram(&data);
134      suspend_all_historam_.PrintConfidenceIntervals(os, 0.99, data);  // Dump time to suspend.
135    }
136  }
137  bool dump_native_stack = Runtime::Current()->GetDumpNativeStackOnSigQuit();
138  Dump(os, dump_native_stack);
139  DumpUnattachedThreads(os, dump_native_stack);
140}
141
142static void DumpUnattachedThread(std::ostream& os, pid_t tid, bool dump_native_stack)
143    NO_THREAD_SAFETY_ANALYSIS {
144  // TODO: No thread safety analysis as DumpState with a null thread won't access fields, should
145  // refactor DumpState to avoid skipping analysis.
146  Thread::DumpState(os, nullptr, tid);
147  DumpKernelStack(os, tid, "  kernel: ", false);
148  if (dump_native_stack && kDumpUnattachedThreadNativeStack) {
149    DumpNativeStack(os, tid, nullptr, "  native: ");
150  }
151  os << "\n";
152}
153
154void ThreadList::DumpUnattachedThreads(std::ostream& os, bool dump_native_stack) {
155  DIR* d = opendir("/proc/self/task");
156  if (!d) {
157    return;
158  }
159
160  Thread* self = Thread::Current();
161  dirent* e;
162  while ((e = readdir(d)) != nullptr) {
163    char* end;
164    pid_t tid = strtol(e->d_name, &end, 10);
165    if (!*end) {
166      bool contains;
167      {
168        MutexLock mu(self, *Locks::thread_list_lock_);
169        contains = Contains(tid);
170      }
171      if (!contains) {
172        DumpUnattachedThread(os, tid, dump_native_stack);
173      }
174    }
175  }
176  closedir(d);
177}
178
179// Dump checkpoint timeout in milliseconds. Larger amount on the target, since the device could be
180// overloaded with ANR dumps.
181static constexpr uint32_t kDumpWaitTimeout = kIsTargetBuild ? 100000 : 20000;
182
183// A closure used by Thread::Dump.
184class DumpCheckpoint FINAL : public Closure {
185 public:
186  DumpCheckpoint(std::ostream* os, bool dump_native_stack)
187      : os_(os),
188        barrier_(0),
189        backtrace_map_(dump_native_stack ? BacktraceMap::Create(getpid()) : nullptr),
190        dump_native_stack_(dump_native_stack) {}
191
192  void Run(Thread* thread) OVERRIDE {
193    // Note thread and self may not be equal if thread was already suspended at the point of the
194    // request.
195    Thread* self = Thread::Current();
196    std::ostringstream local_os;
197    {
198      ScopedObjectAccess soa(self);
199      thread->Dump(local_os, dump_native_stack_, backtrace_map_.get());
200    }
201    local_os << "\n";
202    {
203      // Use the logging lock to ensure serialization when writing to the common ostream.
204      MutexLock mu(self, *Locks::logging_lock_);
205      *os_ << local_os.str();
206    }
207    barrier_.Pass(self);
208  }
209
210  void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint) {
211    Thread* self = Thread::Current();
212    ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
213    bool timed_out = barrier_.Increment(self, threads_running_checkpoint, kDumpWaitTimeout);
214    if (timed_out) {
215      // Avoid a recursive abort.
216      LOG((kIsDebugBuild && (gAborting == 0)) ? FATAL : ERROR)
217          << "Unexpected time out during dump checkpoint.";
218    }
219  }
220
221 private:
222  // The common stream that will accumulate all the dumps.
223  std::ostream* const os_;
224  // The barrier to be passed through and for the requestor to wait upon.
225  Barrier barrier_;
226  // A backtrace map, so that all threads use a shared info and don't reacquire/parse separately.
227  std::unique_ptr<BacktraceMap> backtrace_map_;
228  // Whether we should dump the native stack.
229  const bool dump_native_stack_;
230};
231
232void ThreadList::Dump(std::ostream& os, bool dump_native_stack) {
233  {
234    MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
235    os << "DALVIK THREADS (" << list_.size() << "):\n";
236  }
237  DumpCheckpoint checkpoint(&os, dump_native_stack);
238  size_t threads_running_checkpoint;
239  {
240    // Use SOA to prevent deadlocks if multiple threads are calling Dump() at the same time.
241    ScopedObjectAccess soa(Thread::Current());
242    threads_running_checkpoint = RunCheckpoint(&checkpoint);
243  }
244  if (threads_running_checkpoint != 0) {
245    checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint);
246  }
247}
248
249void ThreadList::AssertThreadsAreSuspended(Thread* self, Thread* ignore1, Thread* ignore2) {
250  MutexLock mu(self, *Locks::thread_list_lock_);
251  MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
252  for (const auto& thread : list_) {
253    if (thread != ignore1 && thread != ignore2) {
254      CHECK(thread->IsSuspended())
255            << "\nUnsuspended thread: <<" << *thread << "\n"
256            << "self: <<" << *Thread::Current();
257    }
258  }
259}
260
261#if HAVE_TIMED_RWLOCK
262// Attempt to rectify locks so that we dump thread list with required locks before exiting.
263NO_RETURN static void UnsafeLogFatalForThreadSuspendAllTimeout() {
264  Runtime* runtime = Runtime::Current();
265  std::ostringstream ss;
266  ss << "Thread suspend timeout\n";
267  Locks::mutator_lock_->Dump(ss);
268  ss << "\n";
269  runtime->GetThreadList()->Dump(ss);
270  LOG(FATAL) << ss.str();
271  exit(0);
272}
273#endif
274
275// Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an
276// individual thread requires polling. delay_us is the requested sleep wait. If delay_us is 0 then
277// we use sched_yield instead of calling usleep.
278static void ThreadSuspendSleep(useconds_t delay_us) {
279  if (delay_us == 0) {
280    sched_yield();
281  } else {
282    usleep(delay_us);
283  }
284}
285
286size_t ThreadList::RunCheckpoint(Closure* checkpoint_function) {
287  Thread* self = Thread::Current();
288  Locks::mutator_lock_->AssertNotExclusiveHeld(self);
289  Locks::thread_list_lock_->AssertNotHeld(self);
290  Locks::thread_suspend_count_lock_->AssertNotHeld(self);
291
292  std::vector<Thread*> suspended_count_modified_threads;
293  size_t count = 0;
294  {
295    // Call a checkpoint function for each thread, threads which are suspend get their checkpoint
296    // manually called.
297    MutexLock mu(self, *Locks::thread_list_lock_);
298    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
299    count = list_.size();
300    for (const auto& thread : list_) {
301      if (thread != self) {
302        while (true) {
303          if (thread->RequestCheckpoint(checkpoint_function)) {
304            // This thread will run its checkpoint some time in the near future.
305            break;
306          } else {
307            // We are probably suspended, try to make sure that we stay suspended.
308            // The thread switched back to runnable.
309            if (thread->GetState() == kRunnable) {
310              // Spurious fail, try again.
311              continue;
312            }
313            thread->ModifySuspendCount(self, +1, nullptr, false);
314            suspended_count_modified_threads.push_back(thread);
315            break;
316          }
317        }
318      }
319    }
320  }
321
322  // Run the checkpoint on ourself while we wait for threads to suspend.
323  checkpoint_function->Run(self);
324
325  // Run the checkpoint on the suspended threads.
326  for (const auto& thread : suspended_count_modified_threads) {
327    if (!thread->IsSuspended()) {
328      if (ATRACE_ENABLED()) {
329        std::ostringstream oss;
330        thread->ShortDump(oss);
331        ATRACE_BEGIN((std::string("Waiting for suspension of thread ") + oss.str()).c_str());
332      }
333      // Busy wait until the thread is suspended.
334      const uint64_t start_time = NanoTime();
335      do {
336        ThreadSuspendSleep(kThreadSuspendInitialSleepUs);
337      } while (!thread->IsSuspended());
338      const uint64_t total_delay = NanoTime() - start_time;
339      // Shouldn't need to wait for longer than 1000 microseconds.
340      constexpr uint64_t kLongWaitThreshold = MsToNs(1);
341      ATRACE_END();
342      if (UNLIKELY(total_delay > kLongWaitThreshold)) {
343        LOG(WARNING) << "Long wait of " << PrettyDuration(total_delay) << " for "
344            << *thread << " suspension!";
345      }
346    }
347    // We know for sure that the thread is suspended at this point.
348    checkpoint_function->Run(thread);
349    {
350      MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
351      thread->ModifySuspendCount(self, -1, nullptr, false);
352    }
353  }
354
355  {
356    // Imitate ResumeAll, threads may be waiting on Thread::resume_cond_ since we raised their
357    // suspend count. Now the suspend_count_ is lowered so we must do the broadcast.
358    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
359    Thread::resume_cond_->Broadcast(self);
360  }
361
362  return count;
363}
364
365// Request that a checkpoint function be run on all active (non-suspended)
366// threads.  Returns the number of successful requests.
367size_t ThreadList::RunCheckpointOnRunnableThreads(Closure* checkpoint_function) {
368  Thread* self = Thread::Current();
369  Locks::mutator_lock_->AssertNotExclusiveHeld(self);
370  Locks::thread_list_lock_->AssertNotHeld(self);
371  Locks::thread_suspend_count_lock_->AssertNotHeld(self);
372  CHECK_NE(self->GetState(), kRunnable);
373
374  size_t count = 0;
375  {
376    // Call a checkpoint function for each non-suspended thread.
377    MutexLock mu(self, *Locks::thread_list_lock_);
378    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
379    for (const auto& thread : list_) {
380      if (thread != self) {
381        if (thread->RequestCheckpoint(checkpoint_function)) {
382          // This thread will run its checkpoint some time in the near future.
383          count++;
384        }
385      }
386    }
387  }
388
389  // Return the number of threads that will run the checkpoint function.
390  return count;
391}
392
393// A checkpoint/suspend-all hybrid to switch thread roots from
394// from-space to to-space refs. Used to synchronize threads at a point
395// to mark the initiation of marking while maintaining the to-space
396// invariant.
397size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
398                                   Closure* flip_callback,
399                                   gc::collector::GarbageCollector* collector) {
400  TimingLogger::ScopedTiming split("ThreadListFlip", collector->GetTimings());
401  const uint64_t start_time = NanoTime();
402  Thread* self = Thread::Current();
403  Locks::mutator_lock_->AssertNotHeld(self);
404  Locks::thread_list_lock_->AssertNotHeld(self);
405  Locks::thread_suspend_count_lock_->AssertNotHeld(self);
406  CHECK_NE(self->GetState(), kRunnable);
407
408  SuspendAllInternal(self, self, nullptr);
409
410  // Run the flip callback for the collector.
411  Locks::mutator_lock_->ExclusiveLock(self);
412  flip_callback->Run(self);
413  Locks::mutator_lock_->ExclusiveUnlock(self);
414  collector->RegisterPause(NanoTime() - start_time);
415
416  // Resume runnable threads.
417  std::vector<Thread*> runnable_threads;
418  std::vector<Thread*> other_threads;
419  {
420    MutexLock mu(self, *Locks::thread_list_lock_);
421    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
422    --suspend_all_count_;
423    for (const auto& thread : list_) {
424      if (thread == self) {
425        continue;
426      }
427      // Set the flip function for both runnable and suspended threads
428      // because Thread::DumpState/DumpJavaStack() (invoked by a
429      // checkpoint) may cause the flip function to be run for a
430      // runnable/suspended thread before a runnable threads runs it
431      // for itself or we run it for a suspended thread below.
432      thread->SetFlipFunction(thread_flip_visitor);
433      if (thread->IsSuspendedAtSuspendCheck()) {
434        // The thread will resume right after the broadcast.
435        thread->ModifySuspendCount(self, -1, nullptr, false);
436        runnable_threads.push_back(thread);
437      } else {
438        other_threads.push_back(thread);
439      }
440    }
441    Thread::resume_cond_->Broadcast(self);
442  }
443
444  // Run the closure on the other threads and let them resume.
445  {
446    ReaderMutexLock mu(self, *Locks::mutator_lock_);
447    for (const auto& thread : other_threads) {
448      Closure* flip_func = thread->GetFlipFunction();
449      if (flip_func != nullptr) {
450        flip_func->Run(thread);
451      }
452    }
453    // Run it for self.
454    thread_flip_visitor->Run(self);
455  }
456
457  // Resume other threads.
458  {
459    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
460    for (const auto& thread : other_threads) {
461      thread->ModifySuspendCount(self, -1, nullptr, false);
462    }
463    Thread::resume_cond_->Broadcast(self);
464  }
465
466  return runnable_threads.size() + other_threads.size() + 1;  // +1 for self.
467}
468
469void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
470  Thread* self = Thread::Current();
471
472  if (self != nullptr) {
473    VLOG(threads) << *self << " SuspendAll for " << cause << " starting...";
474  } else {
475    VLOG(threads) << "Thread[null] SuspendAll for " << cause << " starting...";
476  }
477  {
478    ScopedTrace trace("Suspending mutator threads");
479    const uint64_t start_time = NanoTime();
480
481    SuspendAllInternal(self, self);
482    // All threads are known to have suspended (but a thread may still own the mutator lock)
483    // Make sure this thread grabs exclusive access to the mutator lock and its protected data.
484#if HAVE_TIMED_RWLOCK
485    while (true) {
486      if (Locks::mutator_lock_->ExclusiveLockWithTimeout(self, kThreadSuspendTimeoutMs, 0)) {
487        break;
488      } else if (!long_suspend_) {
489        // Reading long_suspend without the mutator lock is slightly racy, in some rare cases, this
490        // could result in a thread suspend timeout.
491        // Timeout if we wait more than kThreadSuspendTimeoutMs seconds.
492        UnsafeLogFatalForThreadSuspendAllTimeout();
493      }
494    }
495#else
496    Locks::mutator_lock_->ExclusiveLock(self);
497#endif
498
499    long_suspend_ = long_suspend;
500
501    const uint64_t end_time = NanoTime();
502    const uint64_t suspend_time = end_time - start_time;
503    suspend_all_historam_.AdjustAndAddValue(suspend_time);
504    if (suspend_time > kLongThreadSuspendThreshold) {
505      LOG(WARNING) << "Suspending all threads took: " << PrettyDuration(suspend_time);
506    }
507
508    if (kDebugLocking) {
509      // Debug check that all threads are suspended.
510      AssertThreadsAreSuspended(self, self);
511    }
512  }
513  ATRACE_BEGIN((std::string("Mutator threads suspended for ") + cause).c_str());
514
515  if (self != nullptr) {
516    VLOG(threads) << *self << " SuspendAll complete";
517  } else {
518    VLOG(threads) << "Thread[null] SuspendAll complete";
519  }
520}
521
522// Ensures all threads running Java suspend and that those not running Java don't start.
523// Debugger thread might be set to kRunnable for a short period of time after the
524// SuspendAllInternal. This is safe because it will be set back to suspended state before
525// the SuspendAll returns.
526void ThreadList::SuspendAllInternal(Thread* self,
527                                    Thread* ignore1,
528                                    Thread* ignore2,
529                                    bool debug_suspend) {
530  Locks::mutator_lock_->AssertNotExclusiveHeld(self);
531  Locks::thread_list_lock_->AssertNotHeld(self);
532  Locks::thread_suspend_count_lock_->AssertNotHeld(self);
533  if (kDebugLocking && self != nullptr) {
534    CHECK_NE(self->GetState(), kRunnable);
535  }
536
537  // First request that all threads suspend, then wait for them to suspend before
538  // returning. This suspension scheme also relies on other behaviour:
539  // 1. Threads cannot be deleted while they are suspended or have a suspend-
540  //    request flag set - (see Unregister() below).
541  // 2. When threads are created, they are created in a suspended state (actually
542  //    kNative) and will never begin executing Java code without first checking
543  //    the suspend-request flag.
544
545  // The atomic counter for number of threads that need to pass the barrier.
546  AtomicInteger pending_threads;
547  uint32_t num_ignored = 0;
548  if (ignore1 != nullptr) {
549    ++num_ignored;
550  }
551  if (ignore2 != nullptr && ignore1 != ignore2) {
552    ++num_ignored;
553  }
554  {
555    MutexLock mu(self, *Locks::thread_list_lock_);
556    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
557    // Update global suspend all state for attaching threads.
558    ++suspend_all_count_;
559    if (debug_suspend)
560      ++debug_suspend_all_count_;
561    pending_threads.StoreRelaxed(list_.size() - num_ignored);
562    // Increment everybody's suspend count (except those that should be ignored).
563    for (const auto& thread : list_) {
564      if (thread == ignore1 || thread == ignore2) {
565        continue;
566      }
567      VLOG(threads) << "requesting thread suspend: " << *thread;
568      while (true) {
569        if (LIKELY(thread->ModifySuspendCount(self, +1, &pending_threads, debug_suspend))) {
570          break;
571        } else {
572          // Failure means the list of active_suspend_barriers is full, we should release the
573          // thread_suspend_count_lock_ (to avoid deadlock) and wait till the target thread has
574          // executed Thread::PassActiveSuspendBarriers(). Note that we could not simply wait for
575          // the thread to change to a suspended state, because it might need to run checkpoint
576          // function before the state change, which also needs thread_suspend_count_lock_.
577
578          // This is very unlikely to happen since more than kMaxSuspendBarriers threads need to
579          // execute SuspendAllInternal() simultaneously, and target thread stays in kRunnable
580          // in the mean time.
581          Locks::thread_suspend_count_lock_->ExclusiveUnlock(self);
582          NanoSleep(100000);
583          Locks::thread_suspend_count_lock_->ExclusiveLock(self);
584        }
585      }
586
587      // Must install the pending_threads counter first, then check thread->IsSuspend() and clear
588      // the counter. Otherwise there's a race with Thread::TransitionFromRunnableToSuspended()
589      // that can lead a thread to miss a call to PassActiveSuspendBarriers().
590      if (thread->IsSuspended()) {
591        // Only clear the counter for the current thread.
592        thread->ClearSuspendBarrier(&pending_threads);
593        pending_threads.FetchAndSubSequentiallyConsistent(1);
594      }
595    }
596  }
597
598  // Wait for the barrier to be passed by all runnable threads. This wait
599  // is done with a timeout so that we can detect problems.
600#if ART_USE_FUTEXES
601  timespec wait_timeout;
602  InitTimeSpec(true, CLOCK_MONOTONIC, 10000, 0, &wait_timeout);
603#endif
604  while (true) {
605    int32_t cur_val = pending_threads.LoadRelaxed();
606    if (LIKELY(cur_val > 0)) {
607#if ART_USE_FUTEXES
608      if (futex(pending_threads.Address(), FUTEX_WAIT, cur_val, &wait_timeout, nullptr, 0) != 0) {
609        // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
610        if ((errno != EAGAIN) && (errno != EINTR)) {
611          if (errno == ETIMEDOUT) {
612            LOG(kIsDebugBuild ? FATAL : ERROR) << "Unexpected time out during suspend all.";
613          } else {
614            PLOG(FATAL) << "futex wait failed for SuspendAllInternal()";
615          }
616        }
617      } else {
618        cur_val = pending_threads.LoadRelaxed();
619        CHECK_EQ(cur_val, 0);
620        break;
621      }
622#else
623      // Spin wait. This is likely to be slow, but on most architecture ART_USE_FUTEXES is set.
624#endif
625    } else {
626      CHECK_EQ(cur_val, 0);
627      break;
628    }
629  }
630}
631
632void ThreadList::ResumeAll() {
633  Thread* self = Thread::Current();
634
635  if (self != nullptr) {
636    VLOG(threads) << *self << " ResumeAll starting";
637  } else {
638    VLOG(threads) << "Thread[null] ResumeAll starting";
639  }
640
641  ATRACE_END();
642
643  ScopedTrace trace("Resuming mutator threads");
644
645  if (kDebugLocking) {
646    // Debug check that all threads are suspended.
647    AssertThreadsAreSuspended(self, self);
648  }
649
650  long_suspend_ = false;
651
652  Locks::mutator_lock_->ExclusiveUnlock(self);
653  {
654    MutexLock mu(self, *Locks::thread_list_lock_);
655    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
656    // Update global suspend all state for attaching threads.
657    --suspend_all_count_;
658    // Decrement the suspend counts for all threads.
659    for (const auto& thread : list_) {
660      if (thread == self) {
661        continue;
662      }
663      thread->ModifySuspendCount(self, -1, nullptr, false);
664    }
665
666    // Broadcast a notification to all suspended threads, some or all of
667    // which may choose to wake up.  No need to wait for them.
668    if (self != nullptr) {
669      VLOG(threads) << *self << " ResumeAll waking others";
670    } else {
671      VLOG(threads) << "Thread[null] ResumeAll waking others";
672    }
673    Thread::resume_cond_->Broadcast(self);
674  }
675
676  if (self != nullptr) {
677    VLOG(threads) << *self << " ResumeAll complete";
678  } else {
679    VLOG(threads) << "Thread[null] ResumeAll complete";
680  }
681}
682
683void ThreadList::Resume(Thread* thread, bool for_debugger) {
684  // This assumes there was an ATRACE_BEGIN when we suspended the thread.
685  ATRACE_END();
686
687  Thread* self = Thread::Current();
688  DCHECK_NE(thread, self);
689  VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") starting..."
690      << (for_debugger ? " (debugger)" : "");
691
692  {
693    // To check Contains.
694    MutexLock mu(self, *Locks::thread_list_lock_);
695    // To check IsSuspended.
696    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
697    DCHECK(thread->IsSuspended());
698    if (!Contains(thread)) {
699      // We only expect threads within the thread-list to have been suspended otherwise we can't
700      // stop such threads from delete-ing themselves.
701      LOG(ERROR) << "Resume(" << reinterpret_cast<void*>(thread)
702          << ") thread not within thread list";
703      return;
704    }
705    thread->ModifySuspendCount(self, -1, nullptr, for_debugger);
706  }
707
708  {
709    VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") waking others";
710    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
711    Thread::resume_cond_->Broadcast(self);
712  }
713
714  VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") complete";
715}
716
717static void ThreadSuspendByPeerWarning(Thread* self,
718                                       LogSeverity severity,
719                                       const char* message,
720                                       jobject peer) {
721  JNIEnvExt* env = self->GetJniEnv();
722  ScopedLocalRef<jstring>
723      scoped_name_string(env, static_cast<jstring>(env->GetObjectField(
724          peer, WellKnownClasses::java_lang_Thread_name)));
725  ScopedUtfChars scoped_name_chars(env, scoped_name_string.get());
726  if (scoped_name_chars.c_str() == nullptr) {
727      LOG(severity) << message << ": " << peer;
728      env->ExceptionClear();
729  } else {
730      LOG(severity) << message << ": " << peer << ":" << scoped_name_chars.c_str();
731  }
732}
733
734Thread* ThreadList::SuspendThreadByPeer(jobject peer,
735                                        bool request_suspension,
736                                        bool debug_suspension,
737                                        bool* timed_out) {
738  const uint64_t start_time = NanoTime();
739  useconds_t sleep_us = kThreadSuspendInitialSleepUs;
740  *timed_out = false;
741  Thread* const self = Thread::Current();
742  Thread* suspended_thread = nullptr;
743  VLOG(threads) << "SuspendThreadByPeer starting";
744  while (true) {
745    Thread* thread;
746    {
747      // Note: this will transition to runnable and potentially suspend. We ensure only one thread
748      // is requesting another suspend, to avoid deadlock, by requiring this function be called
749      // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
750      // than request thread suspension, to avoid potential cycles in threads requesting each other
751      // suspend.
752      ScopedObjectAccess soa(self);
753      MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
754      thread = Thread::FromManagedThread(soa, peer);
755      if (thread == nullptr) {
756        if (suspended_thread != nullptr) {
757          MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
758          // If we incremented the suspend count but the thread reset its peer, we need to
759          // re-decrement it since it is shutting down and may deadlock the runtime in
760          // ThreadList::WaitForOtherNonDaemonThreadsToExit.
761          suspended_thread->ModifySuspendCount(soa.Self(), -1, nullptr, debug_suspension);
762        }
763        ThreadSuspendByPeerWarning(self, WARNING, "No such thread for suspend", peer);
764        return nullptr;
765      }
766      if (!Contains(thread)) {
767        CHECK(suspended_thread == nullptr);
768        VLOG(threads) << "SuspendThreadByPeer failed for unattached thread: "
769            << reinterpret_cast<void*>(thread);
770        return nullptr;
771      }
772      VLOG(threads) << "SuspendThreadByPeer found thread: " << *thread;
773      {
774        MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
775        if (request_suspension) {
776          if (self->GetSuspendCount() > 0) {
777            // We hold the suspend count lock but another thread is trying to suspend us. Its not
778            // safe to try to suspend another thread in case we get a cycle. Start the loop again
779            // which will allow this thread to be suspended.
780            continue;
781          }
782          CHECK(suspended_thread == nullptr);
783          suspended_thread = thread;
784          suspended_thread->ModifySuspendCount(self, +1, nullptr, debug_suspension);
785          request_suspension = false;
786        } else {
787          // If the caller isn't requesting suspension, a suspension should have already occurred.
788          CHECK_GT(thread->GetSuspendCount(), 0);
789        }
790        // IsSuspended on the current thread will fail as the current thread is changed into
791        // Runnable above. As the suspend count is now raised if this is the current thread
792        // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
793        // to just explicitly handle the current thread in the callers to this code.
794        CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
795        // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
796        // count, or else we've waited and it has self suspended) or is the current thread, we're
797        // done.
798        if (thread->IsSuspended()) {
799          VLOG(threads) << "SuspendThreadByPeer thread suspended: " << *thread;
800          if (ATRACE_ENABLED()) {
801            std::string name;
802            thread->GetThreadName(name);
803            ATRACE_BEGIN(StringPrintf("SuspendThreadByPeer suspended %s for peer=%p", name.c_str(),
804                                      peer).c_str());
805          }
806          return thread;
807        }
808        const uint64_t total_delay = NanoTime() - start_time;
809        if (total_delay >= MsToNs(kThreadSuspendTimeoutMs)) {
810          ThreadSuspendByPeerWarning(self, FATAL, "Thread suspension timed out", peer);
811          if (suspended_thread != nullptr) {
812            CHECK_EQ(suspended_thread, thread);
813            suspended_thread->ModifySuspendCount(soa.Self(), -1, nullptr, debug_suspension);
814          }
815          *timed_out = true;
816          return nullptr;
817        } else if (sleep_us == 0 &&
818            total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) {
819          // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent
820          // excessive CPU usage.
821          sleep_us = kThreadSuspendMaxYieldUs / 2;
822        }
823      }
824      // Release locks and come out of runnable state.
825    }
826    VLOG(threads) << "SuspendThreadByPeer waiting to allow thread chance to suspend";
827    ThreadSuspendSleep(sleep_us);
828    // This may stay at 0 if sleep_us == 0, but this is WAI since we want to avoid using usleep at
829    // all if possible. This shouldn't be an issue since time to suspend should always be small.
830    sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs);
831  }
832}
833
834static void ThreadSuspendByThreadIdWarning(LogSeverity severity,
835                                           const char* message,
836                                           uint32_t thread_id) {
837  LOG(severity) << StringPrintf("%s: %d", message, thread_id);
838}
839
840Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id,
841                                            bool debug_suspension,
842                                            bool* timed_out) {
843  const uint64_t start_time = NanoTime();
844  useconds_t sleep_us = kThreadSuspendInitialSleepUs;
845  *timed_out = false;
846  Thread* suspended_thread = nullptr;
847  Thread* const self = Thread::Current();
848  CHECK_NE(thread_id, kInvalidThreadId);
849  VLOG(threads) << "SuspendThreadByThreadId starting";
850  while (true) {
851    {
852      // Note: this will transition to runnable and potentially suspend. We ensure only one thread
853      // is requesting another suspend, to avoid deadlock, by requiring this function be called
854      // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
855      // than request thread suspension, to avoid potential cycles in threads requesting each other
856      // suspend.
857      ScopedObjectAccess soa(self);
858      MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
859      Thread* thread = nullptr;
860      for (const auto& it : list_) {
861        if (it->GetThreadId() == thread_id) {
862          thread = it;
863          break;
864        }
865      }
866      if (thread == nullptr) {
867        CHECK(suspended_thread == nullptr) << "Suspended thread " << suspended_thread
868            << " no longer in thread list";
869        // There's a race in inflating a lock and the owner giving up ownership and then dying.
870        ThreadSuspendByThreadIdWarning(WARNING, "No such thread id for suspend", thread_id);
871        return nullptr;
872      }
873      VLOG(threads) << "SuspendThreadByThreadId found thread: " << *thread;
874      DCHECK(Contains(thread));
875      {
876        MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
877        if (suspended_thread == nullptr) {
878          if (self->GetSuspendCount() > 0) {
879            // We hold the suspend count lock but another thread is trying to suspend us. Its not
880            // safe to try to suspend another thread in case we get a cycle. Start the loop again
881            // which will allow this thread to be suspended.
882            continue;
883          }
884          thread->ModifySuspendCount(self, +1, nullptr, debug_suspension);
885          suspended_thread = thread;
886        } else {
887          CHECK_EQ(suspended_thread, thread);
888          // If the caller isn't requesting suspension, a suspension should have already occurred.
889          CHECK_GT(thread->GetSuspendCount(), 0);
890        }
891        // IsSuspended on the current thread will fail as the current thread is changed into
892        // Runnable above. As the suspend count is now raised if this is the current thread
893        // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
894        // to just explicitly handle the current thread in the callers to this code.
895        CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
896        // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
897        // count, or else we've waited and it has self suspended) or is the current thread, we're
898        // done.
899        if (thread->IsSuspended()) {
900          if (ATRACE_ENABLED()) {
901            std::string name;
902            thread->GetThreadName(name);
903            ATRACE_BEGIN(StringPrintf("SuspendThreadByThreadId suspended %s id=%d",
904                                      name.c_str(), thread_id).c_str());
905          }
906          VLOG(threads) << "SuspendThreadByThreadId thread suspended: " << *thread;
907          return thread;
908        }
909        const uint64_t total_delay = NanoTime() - start_time;
910        if (total_delay >= MsToNs(kThreadSuspendTimeoutMs)) {
911          ThreadSuspendByThreadIdWarning(WARNING, "Thread suspension timed out", thread_id);
912          if (suspended_thread != nullptr) {
913            thread->ModifySuspendCount(soa.Self(), -1, nullptr, debug_suspension);
914          }
915          *timed_out = true;
916          return nullptr;
917        } else if (sleep_us == 0 &&
918            total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) {
919          // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent
920          // excessive CPU usage.
921          sleep_us = kThreadSuspendMaxYieldUs / 2;
922        }
923      }
924      // Release locks and come out of runnable state.
925    }
926    VLOG(threads) << "SuspendThreadByThreadId waiting to allow thread chance to suspend";
927    ThreadSuspendSleep(sleep_us);
928    sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs);
929  }
930}
931
932Thread* ThreadList::FindThreadByThreadId(uint32_t thread_id) {
933  for (const auto& thread : list_) {
934    if (thread->GetThreadId() == thread_id) {
935      return thread;
936    }
937  }
938  return nullptr;
939}
940
941void ThreadList::SuspendAllForDebugger() {
942  Thread* self = Thread::Current();
943  Thread* debug_thread = Dbg::GetDebugThread();
944
945  VLOG(threads) << *self << " SuspendAllForDebugger starting...";
946
947  SuspendAllInternal(self, self, debug_thread, true);
948  // Block on the mutator lock until all Runnable threads release their share of access then
949  // immediately unlock again.
950#if HAVE_TIMED_RWLOCK
951  // Timeout if we wait more than 30 seconds.
952  if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 30 * 1000, 0)) {
953    UnsafeLogFatalForThreadSuspendAllTimeout();
954  } else {
955    Locks::mutator_lock_->ExclusiveUnlock(self);
956  }
957#else
958  Locks::mutator_lock_->ExclusiveLock(self);
959  Locks::mutator_lock_->ExclusiveUnlock(self);
960#endif
961  // Disabled for the following race condition:
962  // Thread 1 calls SuspendAllForDebugger, gets preempted after pulsing the mutator lock.
963  // Thread 2 calls SuspendAll and SetStateUnsafe (perhaps from Dbg::Disconnected).
964  // Thread 1 fails assertion that all threads are suspended due to thread 2 being in a runnable
965  // state (from SetStateUnsafe).
966  // AssertThreadsAreSuspended(self, self, debug_thread);
967
968  VLOG(threads) << *self << " SuspendAllForDebugger complete";
969}
970
971void ThreadList::SuspendSelfForDebugger() {
972  Thread* const self = Thread::Current();
973  self->SetReadyForDebugInvoke(true);
974
975  // The debugger thread must not suspend itself due to debugger activity!
976  Thread* debug_thread = Dbg::GetDebugThread();
977  CHECK(self != debug_thread);
978  CHECK_NE(self->GetState(), kRunnable);
979  Locks::mutator_lock_->AssertNotHeld(self);
980
981  // The debugger may have detached while we were executing an invoke request. In that case, we
982  // must not suspend ourself.
983  DebugInvokeReq* pReq = self->GetInvokeReq();
984  const bool skip_thread_suspension = (pReq != nullptr && !Dbg::IsDebuggerActive());
985  if (!skip_thread_suspension) {
986    // Collisions with other suspends aren't really interesting. We want
987    // to ensure that we're the only one fiddling with the suspend count
988    // though.
989    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
990    self->ModifySuspendCount(self, +1, nullptr, true);
991    CHECK_GT(self->GetSuspendCount(), 0);
992
993    VLOG(threads) << *self << " self-suspending (debugger)";
994  } else {
995    // We must no longer be subject to debugger suspension.
996    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
997    CHECK_EQ(self->GetDebugSuspendCount(), 0) << "Debugger detached without resuming us";
998
999    VLOG(threads) << *self << " not self-suspending because debugger detached during invoke";
1000  }
1001
1002  // If the debugger requested an invoke, we need to send the reply and clear the request.
1003  if (pReq != nullptr) {
1004    Dbg::FinishInvokeMethod(pReq);
1005    self->ClearDebugInvokeReq();
1006    pReq = nullptr;  // object has been deleted, clear it for safety.
1007  }
1008
1009  // Tell JDWP that we've completed suspension. The JDWP thread can't
1010  // tell us to resume before we're fully asleep because we hold the
1011  // suspend count lock.
1012  Dbg::ClearWaitForEventThread();
1013
1014  {
1015    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
1016    while (self->GetSuspendCount() != 0) {
1017      Thread::resume_cond_->Wait(self);
1018      if (self->GetSuspendCount() != 0) {
1019        // The condition was signaled but we're still suspended. This
1020        // can happen when we suspend then resume all threads to
1021        // update instrumentation or compute monitor info. This can
1022        // also happen if the debugger lets go while a SIGQUIT thread
1023        // dump event is pending (assuming SignalCatcher was resumed for
1024        // just long enough to try to grab the thread-suspend lock).
1025        VLOG(jdwp) << *self << " still suspended after undo "
1026                   << "(suspend count=" << self->GetSuspendCount() << ", "
1027                   << "debug suspend count=" << self->GetDebugSuspendCount() << ")";
1028      }
1029    }
1030    CHECK_EQ(self->GetSuspendCount(), 0);
1031  }
1032
1033  self->SetReadyForDebugInvoke(false);
1034  VLOG(threads) << *self << " self-reviving (debugger)";
1035}
1036
1037void ThreadList::ResumeAllForDebugger() {
1038  Thread* self = Thread::Current();
1039  Thread* debug_thread = Dbg::GetDebugThread();
1040
1041  VLOG(threads) << *self << " ResumeAllForDebugger starting...";
1042
1043  // Threads can't resume if we exclusively hold the mutator lock.
1044  Locks::mutator_lock_->AssertNotExclusiveHeld(self);
1045
1046  {
1047    MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
1048    {
1049      MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
1050      // Update global suspend all state for attaching threads.
1051      DCHECK_GE(suspend_all_count_, debug_suspend_all_count_);
1052      if (debug_suspend_all_count_ > 0) {
1053        --suspend_all_count_;
1054        --debug_suspend_all_count_;
1055      } else {
1056        // We've been asked to resume all threads without being asked to
1057        // suspend them all before. That may happen if a debugger tries
1058        // to resume some suspended threads (with suspend count == 1)
1059        // at once with a VirtualMachine.Resume command. Let's print a
1060        // warning.
1061        LOG(WARNING) << "Debugger attempted to resume all threads without "
1062                     << "having suspended them all before.";
1063      }
1064      // Decrement everybody's suspend count (except our own).
1065      for (const auto& thread : list_) {
1066        if (thread == self || thread == debug_thread) {
1067          continue;
1068        }
1069        if (thread->GetDebugSuspendCount() == 0) {
1070          // This thread may have been individually resumed with ThreadReference.Resume.
1071          continue;
1072        }
1073        VLOG(threads) << "requesting thread resume: " << *thread;
1074        thread->ModifySuspendCount(self, -1, nullptr, true);
1075      }
1076    }
1077  }
1078
1079  {
1080    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
1081    Thread::resume_cond_->Broadcast(self);
1082  }
1083
1084  VLOG(threads) << *self << " ResumeAllForDebugger complete";
1085}
1086
1087void ThreadList::UndoDebuggerSuspensions() {
1088  Thread* self = Thread::Current();
1089
1090  VLOG(threads) << *self << " UndoDebuggerSuspensions starting";
1091
1092  {
1093    MutexLock mu(self, *Locks::thread_list_lock_);
1094    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1095    // Update global suspend all state for attaching threads.
1096    suspend_all_count_ -= debug_suspend_all_count_;
1097    debug_suspend_all_count_ = 0;
1098    // Update running threads.
1099    for (const auto& thread : list_) {
1100      if (thread == self || thread->GetDebugSuspendCount() == 0) {
1101        continue;
1102      }
1103      thread->ModifySuspendCount(self, -thread->GetDebugSuspendCount(), nullptr, true);
1104    }
1105  }
1106
1107  {
1108    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
1109    Thread::resume_cond_->Broadcast(self);
1110  }
1111
1112  VLOG(threads) << "UndoDebuggerSuspensions(" << *self << ") complete";
1113}
1114
1115void ThreadList::WaitForOtherNonDaemonThreadsToExit() {
1116  ScopedTrace trace(__PRETTY_FUNCTION__);
1117  Thread* self = Thread::Current();
1118  Locks::mutator_lock_->AssertNotHeld(self);
1119  while (true) {
1120    {
1121      // No more threads can be born after we start to shutdown.
1122      MutexLock mu(self, *Locks::runtime_shutdown_lock_);
1123      CHECK(Runtime::Current()->IsShuttingDownLocked());
1124      CHECK_EQ(Runtime::Current()->NumberOfThreadsBeingBorn(), 0U);
1125    }
1126    MutexLock mu(self, *Locks::thread_list_lock_);
1127    // Also wait for any threads that are unregistering to finish. This is required so that no
1128    // threads access the thread list after it is deleted. TODO: This may not work for user daemon
1129    // threads since they could unregister at the wrong time.
1130    bool done = unregistering_count_ == 0;
1131    if (done) {
1132      for (const auto& thread : list_) {
1133        if (thread != self && !thread->IsDaemon()) {
1134          done = false;
1135          break;
1136        }
1137      }
1138    }
1139    if (done) {
1140      break;
1141    }
1142    // Wait for another thread to exit before re-checking.
1143    Locks::thread_exit_cond_->Wait(self);
1144  }
1145}
1146
1147void ThreadList::SuspendAllDaemonThreadsForShutdown() {
1148  ScopedTrace trace(__PRETTY_FUNCTION__);
1149  Thread* self = Thread::Current();
1150  MutexLock mu(self, *Locks::thread_list_lock_);
1151  size_t daemons_left = 0;
1152  {  // Tell all the daemons it's time to suspend.
1153    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1154    for (const auto& thread : list_) {
1155      // This is only run after all non-daemon threads have exited, so the remainder should all be
1156      // daemons.
1157      CHECK(thread->IsDaemon()) << *thread;
1158      if (thread != self) {
1159        thread->ModifySuspendCount(self, +1, nullptr, false);
1160        ++daemons_left;
1161      }
1162      // We are shutting down the runtime, set the JNI functions of all the JNIEnvs to be
1163      // the sleep forever one.
1164      thread->GetJniEnv()->SetFunctionsToRuntimeShutdownFunctions();
1165    }
1166  }
1167  // If we have any daemons left, wait 200ms to ensure they are not stuck in a place where they
1168  // are about to access runtime state and are not in a runnable state. Examples: Monitor code
1169  // or waking up from a condition variable. TODO: Try and see if there is a better way to wait
1170  // for daemon threads to be in a blocked state.
1171  if (daemons_left > 0) {
1172    static constexpr size_t kDaemonSleepTime = 200 * 1000;
1173    usleep(kDaemonSleepTime);
1174  }
1175  // Give the threads a chance to suspend, complaining if they're slow.
1176  bool have_complained = false;
1177  static constexpr size_t kTimeoutMicroseconds = 2000 * 1000;
1178  static constexpr size_t kSleepMicroseconds = 1000;
1179  for (size_t i = 0; i < kTimeoutMicroseconds / kSleepMicroseconds; ++i) {
1180    bool all_suspended = true;
1181    for (const auto& thread : list_) {
1182      if (thread != self && thread->GetState() == kRunnable) {
1183        if (!have_complained) {
1184          LOG(WARNING) << "daemon thread not yet suspended: " << *thread;
1185          have_complained = true;
1186        }
1187        all_suspended = false;
1188      }
1189    }
1190    if (all_suspended) {
1191      return;
1192    }
1193    usleep(kSleepMicroseconds);
1194  }
1195  LOG(WARNING) << "timed out suspending all daemon threads";
1196}
1197
1198void ThreadList::Register(Thread* self) {
1199  DCHECK_EQ(self, Thread::Current());
1200
1201  if (VLOG_IS_ON(threads)) {
1202    std::ostringstream oss;
1203    self->ShortDump(oss);  // We don't hold the mutator_lock_ yet and so cannot call Dump.
1204    LOG(INFO) << "ThreadList::Register() " << *self  << "\n" << oss.str();
1205  }
1206
1207  // Atomically add self to the thread list and make its thread_suspend_count_ reflect ongoing
1208  // SuspendAll requests.
1209  MutexLock mu(self, *Locks::thread_list_lock_);
1210  MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1211  CHECK_GE(suspend_all_count_, debug_suspend_all_count_);
1212  // Modify suspend count in increments of 1 to maintain invariants in ModifySuspendCount. While
1213  // this isn't particularly efficient the suspend counts are most commonly 0 or 1.
1214  for (int delta = debug_suspend_all_count_; delta > 0; delta--) {
1215    self->ModifySuspendCount(self, +1, nullptr, true);
1216  }
1217  for (int delta = suspend_all_count_ - debug_suspend_all_count_; delta > 0; delta--) {
1218    self->ModifySuspendCount(self, +1, nullptr, false);
1219  }
1220  CHECK(!Contains(self));
1221  list_.push_back(self);
1222  if (kUseReadBarrier) {
1223    // Initialize according to the state of the CC collector.
1224    bool is_gc_marking =
1225        Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsMarking();
1226    self->SetIsGcMarking(is_gc_marking);
1227    bool weak_ref_access_enabled =
1228        Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsWeakRefAccessEnabled();
1229    self->SetWeakRefAccessEnabled(weak_ref_access_enabled);
1230  }
1231}
1232
1233void ThreadList::Unregister(Thread* self) {
1234  DCHECK_EQ(self, Thread::Current());
1235  CHECK_NE(self->GetState(), kRunnable);
1236  Locks::mutator_lock_->AssertNotHeld(self);
1237
1238  VLOG(threads) << "ThreadList::Unregister() " << *self;
1239
1240  {
1241    MutexLock mu(self, *Locks::thread_list_lock_);
1242    ++unregistering_count_;
1243  }
1244
1245  // Any time-consuming destruction, plus anything that can call back into managed code or
1246  // suspend and so on, must happen at this point, and not in ~Thread. The self->Destroy is what
1247  // causes the threads to join. It is important to do this after incrementing unregistering_count_
1248  // since we want the runtime to wait for the daemon threads to exit before deleting the thread
1249  // list.
1250  self->Destroy();
1251
1252  // If tracing, remember thread id and name before thread exits.
1253  Trace::StoreExitingThreadInfo(self);
1254
1255  uint32_t thin_lock_id = self->GetThreadId();
1256  while (true) {
1257    // Remove and delete the Thread* while holding the thread_list_lock_ and
1258    // thread_suspend_count_lock_ so that the unregistering thread cannot be suspended.
1259    // Note: deliberately not using MutexLock that could hold a stale self pointer.
1260    MutexLock mu(self, *Locks::thread_list_lock_);
1261    if (!Contains(self)) {
1262      std::string thread_name;
1263      self->GetThreadName(thread_name);
1264      std::ostringstream os;
1265      DumpNativeStack(os, GetTid(), nullptr, "  native: ", nullptr);
1266      LOG(ERROR) << "Request to unregister unattached thread " << thread_name << "\n" << os.str();
1267      break;
1268    } else {
1269      MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1270      if (!self->IsSuspended()) {
1271        list_.remove(self);
1272        break;
1273      }
1274    }
1275    // We failed to remove the thread due to a suspend request, loop and try again.
1276  }
1277  delete self;
1278
1279  // Release the thread ID after the thread is finished and deleted to avoid cases where we can
1280  // temporarily have multiple threads with the same thread id. When this occurs, it causes
1281  // problems in FindThreadByThreadId / SuspendThreadByThreadId.
1282  ReleaseThreadId(nullptr, thin_lock_id);
1283
1284  // Clear the TLS data, so that the underlying native thread is recognizably detached.
1285  // (It may wish to reattach later.)
1286#ifdef __ANDROID__
1287  __get_tls()[TLS_SLOT_ART_THREAD_SELF] = nullptr;
1288#else
1289  CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, nullptr), "detach self");
1290#endif
1291
1292  // Signal that a thread just detached.
1293  MutexLock mu(nullptr, *Locks::thread_list_lock_);
1294  --unregistering_count_;
1295  Locks::thread_exit_cond_->Broadcast(nullptr);
1296}
1297
1298void ThreadList::ForEach(void (*callback)(Thread*, void*), void* context) {
1299  for (const auto& thread : list_) {
1300    callback(thread, context);
1301  }
1302}
1303
1304void ThreadList::VisitRoots(RootVisitor* visitor) const {
1305  MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
1306  for (const auto& thread : list_) {
1307    thread->VisitRoots(visitor);
1308  }
1309}
1310
1311uint32_t ThreadList::AllocThreadId(Thread* self) {
1312  MutexLock mu(self, *Locks::allocated_thread_ids_lock_);
1313  for (size_t i = 0; i < allocated_ids_.size(); ++i) {
1314    if (!allocated_ids_[i]) {
1315      allocated_ids_.set(i);
1316      return i + 1;  // Zero is reserved to mean "invalid".
1317    }
1318  }
1319  LOG(FATAL) << "Out of internal thread ids";
1320  return 0;
1321}
1322
1323void ThreadList::ReleaseThreadId(Thread* self, uint32_t id) {
1324  MutexLock mu(self, *Locks::allocated_thread_ids_lock_);
1325  --id;  // Zero is reserved to mean "invalid".
1326  DCHECK(allocated_ids_[id]) << id;
1327  allocated_ids_.reset(id);
1328}
1329
1330ScopedSuspendAll::ScopedSuspendAll(const char* cause, bool long_suspend) {
1331  Runtime::Current()->GetThreadList()->SuspendAll(cause, long_suspend);
1332}
1333
1334ScopedSuspendAll::~ScopedSuspendAll() {
1335  Runtime::Current()->GetThreadList()->ResumeAll();
1336}
1337
1338}  // namespace art
1339